From 7f6271b2e01f5c9d22d9d421b52f534d36c978d5 Mon Sep 17 00:00:00 2001 From: adamagassi Date: Tue, 28 May 2024 23:38:13 +0300 Subject: [PATCH] LinkedIn scraper fixes: (#159) Correct initial page offset calculation Separate page variable from request counter Fix job offset starting value Increment offset by number of jobs returned instead of expected value --- src/jobspy/scrapers/linkedin/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py index 840b2fb..f3bda94 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/jobspy/scrapers/linkedin/__init__.py @@ -72,7 +72,8 @@ class LinkedInScraper(Scraper): job_list: list[JobPost] = [] seen_urls = set() url_lock = Lock() - page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0 + page = scraper_input.offset // 25 * 25 if scraper_input.offset else 0 + request_count = 0 seconds_old = ( scraper_input.hours_old * 3600 if scraper_input.hours_old else None ) @@ -80,7 +81,8 @@ class LinkedInScraper(Scraper): lambda: len(job_list) < scraper_input.results_wanted and page < 1000 ) while continue_search(): - logger.info(f"LinkedIn search page: {page // 25 + 1}") + request_count += 1 + logger.info(f"LinkedIn search page: {request_count}") params = { "keywords": scraper_input.search_term, "location": scraper_input.location, @@ -92,7 +94,7 @@ class LinkedInScraper(Scraper): else None ), "pageNum": 0, - "start": page + scraper_input.offset, + "start": page, "f_AL": "true" if scraper_input.easy_apply else None, "f_C": ( ",".join(map(str, scraper_input.linkedin_company_ids)) @@ -156,7 +158,7 @@ class LinkedInScraper(Scraper): if continue_search(): time.sleep(random.uniform(self.delay, self.delay + self.band_delay)) - page += self.jobs_per_page + page += len(job_list) job_list = job_list[: scraper_input.results_wanted] return JobResponse(jobs=job_list)