mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 12:04:33 -08:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c43f82fb1 | ||
|
|
6ba571f5e4 | ||
|
|
b43289fa38 |
1901
poetry.lock
generated
1901
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.72"
|
version = "1.1.70"
|
||||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/JobSpy"
|
homepage = "https://github.com/Bunsly/JobSpy"
|
||||||
@@ -19,7 +19,7 @@ pandas = "^2.1.0"
|
|||||||
NUMPY = "1.26.3"
|
NUMPY = "1.26.3"
|
||||||
pydantic = "^2.3.0"
|
pydantic = "^2.3.0"
|
||||||
tls-client = "^1.0.1"
|
tls-client = "^1.0.1"
|
||||||
markdownify = "^0.13.1"
|
markdownify = "^0.11.6"
|
||||||
regex = "^2024.4.28"
|
regex = "^2024.4.28"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -69,8 +69,15 @@ class IndeedScraper(Scraper):
|
|||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
cursor = None
|
cursor = None
|
||||||
|
offset_pages = math.ceil(self.scraper_input.offset / 100)
|
||||||
|
for _ in range(offset_pages):
|
||||||
|
logger.info(f"skipping search page: {page}")
|
||||||
|
__, cursor = self._scrape_page(cursor)
|
||||||
|
if not __:
|
||||||
|
logger.info(f"found no jobs on page: {page}")
|
||||||
|
break
|
||||||
|
|
||||||
while len(self.seen_urls) < scraper_input.results_wanted + scraper_input.offset:
|
while len(self.seen_urls) < scraper_input.results_wanted:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"search page: {page} / {math.ceil(scraper_input.results_wanted / 100)}"
|
f"search page: {page} / {math.ceil(scraper_input.results_wanted / 100)}"
|
||||||
)
|
)
|
||||||
@@ -80,12 +87,7 @@ class IndeedScraper(Scraper):
|
|||||||
break
|
break
|
||||||
job_list += jobs
|
job_list += jobs
|
||||||
page += 1
|
page += 1
|
||||||
return JobResponse(
|
return JobResponse(jobs=job_list[: scraper_input.results_wanted])
|
||||||
jobs=job_list[
|
|
||||||
scraper_input.offset : scraper_input.offset
|
|
||||||
+ scraper_input.results_wanted
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def _scrape_page(self, cursor: str | None) -> Tuple[list[JobPost], str | None]:
|
def _scrape_page(self, cursor: str | None) -> Tuple[list[JobPost], str | None]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user