From f395597fdddfe97384beca26a6690cf0b54c4697 Mon Sep 17 00:00:00 2001 From: Cullen Watson Date: Tue, 22 Oct 2024 19:25:07 -0500 Subject: [PATCH] fix(indeed): offset --- pyproject.toml | 2 +- src/jobspy/scrapers/indeed/__init__.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 87c9d10..d443829 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.1.71" +version = "1.1.72" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/JobSpy" diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index 0aa9f44..f3f679c 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -69,15 +69,8 @@ class IndeedScraper(Scraper): page = 1 cursor = None - offset_pages = math.ceil(self.scraper_input.offset / 100) - for _ in range(offset_pages): - logger.info(f"skipping search page: {page}") - __, cursor = self._scrape_page(cursor) - if not __: - logger.info(f"found no jobs on page: {page}") - break - while len(self.seen_urls) < scraper_input.results_wanted: + while len(self.seen_urls) < scraper_input.results_wanted + scraper_input.offset: logger.info( f"search page: {page} / {math.ceil(scraper_input.results_wanted / 100)}" ) @@ -87,7 +80,12 @@ class IndeedScraper(Scraper): break job_list += jobs page += 1 - return JobResponse(jobs=job_list[: scraper_input.results_wanted]) + return JobResponse( + jobs=job_list[ + scraper_input.offset : scraper_input.offset + + scraper_input.results_wanted + ] + ) def _scrape_page(self, cursor: str | None) -> Tuple[list[JobPost], str | None]: """