mirror of https://github.com/Bunsly/JobSpy
LinkedIn scraper fixes: (#159)
Correct initial page offset calculation Separate page variable from request counter Fix job offset starting value Increment offset by number of jobs returned instead of expected valuepull/160/head
parent
5cb7ffe5fd
commit
7f6271b2e0
|
@ -72,7 +72,8 @@ class LinkedInScraper(Scraper):
|
||||||
job_list: list[JobPost] = []
|
job_list: list[JobPost] = []
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
url_lock = Lock()
|
url_lock = Lock()
|
||||||
page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0
|
page = scraper_input.offset // 25 * 25 if scraper_input.offset else 0
|
||||||
|
request_count = 0
|
||||||
seconds_old = (
|
seconds_old = (
|
||||||
scraper_input.hours_old * 3600 if scraper_input.hours_old else None
|
scraper_input.hours_old * 3600 if scraper_input.hours_old else None
|
||||||
)
|
)
|
||||||
|
@ -80,7 +81,8 @@ class LinkedInScraper(Scraper):
|
||||||
lambda: len(job_list) < scraper_input.results_wanted and page < 1000
|
lambda: len(job_list) < scraper_input.results_wanted and page < 1000
|
||||||
)
|
)
|
||||||
while continue_search():
|
while continue_search():
|
||||||
logger.info(f"LinkedIn search page: {page // 25 + 1}")
|
request_count += 1
|
||||||
|
logger.info(f"LinkedIn search page: {request_count}")
|
||||||
params = {
|
params = {
|
||||||
"keywords": scraper_input.search_term,
|
"keywords": scraper_input.search_term,
|
||||||
"location": scraper_input.location,
|
"location": scraper_input.location,
|
||||||
|
@ -92,7 +94,7 @@ class LinkedInScraper(Scraper):
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
"pageNum": 0,
|
"pageNum": 0,
|
||||||
"start": page + scraper_input.offset,
|
"start": page,
|
||||||
"f_AL": "true" if scraper_input.easy_apply else None,
|
"f_AL": "true" if scraper_input.easy_apply else None,
|
||||||
"f_C": (
|
"f_C": (
|
||||||
",".join(map(str, scraper_input.linkedin_company_ids))
|
",".join(map(str, scraper_input.linkedin_company_ids))
|
||||||
|
@ -156,7 +158,7 @@ class LinkedInScraper(Scraper):
|
||||||
|
|
||||||
if continue_search():
|
if continue_search():
|
||||||
time.sleep(random.uniform(self.delay, self.delay + self.band_delay))
|
time.sleep(random.uniform(self.delay, self.delay + self.band_delay))
|
||||||
page += self.jobs_per_page
|
page += len(job_list)
|
||||||
|
|
||||||
job_list = job_list[: scraper_input.results_wanted]
|
job_list = job_list[: scraper_input.results_wanted]
|
||||||
return JobResponse(jobs=job_list)
|
return JobResponse(jobs=job_list)
|
||||||
|
|
Loading…
Reference in New Issue