Compare commits

..

4 Commits

Author SHA1 Message Date
Cullen Watson
f395597fdd fix(indeed): offset 2024-10-22 19:25:07 -05:00
Cullen Watson
6372e41bd9 chore:version 2024-10-20 00:19:31 -05:00
Olzhas Arystanov
6c869decb8 build(deps): bump markdownify to 0.13.1 (#211) 2024-10-20 00:18:44 -05:00
Cullen Watson
9f4083380d indeed:remove tpe (#210) 2024-10-19 18:01:59 -05:00
3 changed files with 991 additions and 930 deletions

1901
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.70" version = "1.1.72"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy" homepage = "https://github.com/Bunsly/JobSpy"
@@ -19,7 +19,7 @@ pandas = "^2.1.0"
NUMPY = "1.26.3" NUMPY = "1.26.3"
pydantic = "^2.3.0" pydantic = "^2.3.0"
tls-client = "^1.0.1" tls-client = "^1.0.1"
markdownify = "^0.11.6" markdownify = "^0.13.1"
regex = "^2024.4.28" regex = "^2024.4.28"

View File

@@ -69,15 +69,8 @@ class IndeedScraper(Scraper):
page = 1 page = 1
cursor = None cursor = None
offset_pages = math.ceil(self.scraper_input.offset / 100)
for _ in range(offset_pages):
logger.info(f"skipping search page: {page}")
__, cursor = self._scrape_page(cursor)
if not __:
logger.info(f"found no jobs on page: {page}")
break
while len(self.seen_urls) < scraper_input.results_wanted: while len(self.seen_urls) < scraper_input.results_wanted + scraper_input.offset:
logger.info( logger.info(
f"search page: {page} / {math.ceil(scraper_input.results_wanted / 100)}" f"search page: {page} / {math.ceil(scraper_input.results_wanted / 100)}"
) )
@@ -87,7 +80,12 @@ class IndeedScraper(Scraper):
break break
job_list += jobs job_list += jobs
page += 1 page += 1
return JobResponse(jobs=job_list[: scraper_input.results_wanted]) return JobResponse(
jobs=job_list[
scraper_input.offset : scraper_input.offset
+ scraper_input.results_wanted
]
)
def _scrape_page(self, cursor: str | None) -> Tuple[list[JobPost], str | None]: def _scrape_page(self, cursor: str | None) -> Tuple[list[JobPost], str | None]:
""" """