mirror of https://github.com/Bunsly/JobSpy
fix(indeed): return no jobs instead of error
parent
2b723819f2
commit
ccd18595fe
|
@ -1,7 +1,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
import concurrent.futures
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
|
|
||||||
from .jobs import JobType, Location
|
from .jobs import JobType, Location
|
||||||
from .scrapers.indeed import IndeedScraper
|
from .scrapers.indeed import IndeedScraper
|
||||||
|
@ -119,7 +118,7 @@ def scrape_jobs(
|
||||||
executor.submit(worker, site): site for site in scraper_input.site_type
|
executor.submit(worker, site): site for site in scraper_input.site_type
|
||||||
}
|
}
|
||||||
|
|
||||||
for future in concurrent.futures.as_completed(future_to_site):
|
for future in as_completed(future_to_site):
|
||||||
site_value, scraped_data = future.result()
|
site_value, scraped_data = future.result()
|
||||||
site_to_jobs_dict[site_value] = scraped_data
|
site_to_jobs_dict[site_value] = scraped_data
|
||||||
|
|
||||||
|
|
|
@ -80,13 +80,14 @@ class IndeedScraper(Scraper):
|
||||||
raise IndeedException(str(e))
|
raise IndeedException(str(e))
|
||||||
|
|
||||||
soup = BeautifulSoup(response.content, "html.parser")
|
soup = BeautifulSoup(response.content, "html.parser")
|
||||||
|
job_list = []
|
||||||
|
total_num_jobs = IndeedScraper.total_jobs(soup)
|
||||||
if "did not match any jobs" in response.text:
|
if "did not match any jobs" in response.text:
|
||||||
raise IndeedException("Parsing exception: Search did not match any jobs")
|
return job_list, total_num_jobs
|
||||||
|
|
||||||
jobs = IndeedScraper.parse_jobs(
|
jobs = IndeedScraper.parse_jobs(
|
||||||
soup
|
soup
|
||||||
) #: can raise exception, handled by main scrape function
|
) #: can raise exception, handled by main scrape function
|
||||||
total_num_jobs = IndeedScraper.total_jobs(soup)
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
not jobs.get("metaData", {})
|
not jobs.get("metaData", {})
|
||||||
|
@ -152,26 +153,34 @@ class IndeedScraper(Scraper):
|
||||||
:param scraper_input:
|
:param scraper_input:
|
||||||
:return: job_response
|
:return: job_response
|
||||||
"""
|
"""
|
||||||
pages_to_process = (
|
|
||||||
math.ceil(scraper_input.results_wanted / self.jobs_per_page) - 1
|
|
||||||
)
|
|
||||||
|
|
||||||
#: get first page to initialize session
|
|
||||||
job_list, total_results = self.scrape_page(scraper_input, 0)
|
job_list, total_results = self.scrape_page(scraper_input, 0)
|
||||||
|
pages_processed = 1
|
||||||
|
|
||||||
|
while len(self.seen_urls) < scraper_input.results_wanted:
|
||||||
|
pages_to_process = math.ceil((scraper_input.results_wanted - len(self.seen_urls)) / self.jobs_per_page)
|
||||||
|
new_jobs = False
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
futures: list[Future] = [
|
futures: list[Future] = [
|
||||||
executor.submit(self.scrape_page, scraper_input, page)
|
executor.submit(self.scrape_page, scraper_input, page + pages_processed)
|
||||||
for page in range(1, pages_to_process + 1)
|
for page in range(pages_to_process)
|
||||||
]
|
]
|
||||||
|
|
||||||
for future in futures:
|
for future in futures:
|
||||||
jobs, _ = future.result()
|
jobs, _ = future.result()
|
||||||
|
if jobs:
|
||||||
job_list += jobs
|
job_list += jobs
|
||||||
|
new_jobs = True
|
||||||
|
if len(self.seen_urls) >= scraper_input.results_wanted:
|
||||||
|
break
|
||||||
|
|
||||||
if len(job_list) > scraper_input.results_wanted:
|
pages_processed += pages_to_process
|
||||||
job_list = job_list[: scraper_input.results_wanted]
|
if not new_jobs:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
if len(self.seen_urls) > scraper_input.results_wanted:
|
||||||
|
job_list = job_list[:scraper_input.results_wanted]
|
||||||
|
|
||||||
job_response = JobResponse(
|
job_response = JobResponse(
|
||||||
jobs=job_list,
|
jobs=job_list,
|
||||||
|
|
Loading…
Reference in New Issue