diff --git a/api/core/jobs/__init__.py b/api/core/jobs/__init__.py index 622f110..cbf15e8 100644 --- a/api/core/jobs/__init__.py +++ b/api/core/jobs/__init__.py @@ -57,15 +57,13 @@ class JobResponse(BaseModel): success: bool error: str = None - jobs: list[JobPost] = [] - total_results: int = None returned_results: int = None + jobs: list[JobPost] = [] + @validator("returned_results") def set_returned_results(cls, v, values): if v is None and values.get("jobs"): return len(values["jobs"]) return v - - diff --git a/api/core/scrapers/indeed/__init__.py b/api/core/scrapers/indeed/__init__.py index 9751bc7..40d4601 100644 --- a/api/core/scrapers/indeed/__init__.py +++ b/api/core/scrapers/indeed/__init__.py @@ -33,12 +33,8 @@ class IndeedScraper(Scraper): self.seen_urls = set() def scrape_page( - self, - scraper_input: ScraperInput, - page: int, - session: tls_client.Session + self, scraper_input: ScraperInput, page: int, session: tls_client.Session ) -> tuple[list[JobPost], int]: - """ Scrapes a page of Indeed for jobs with scraper_input criteria :param scraper_input: @@ -67,22 +63,24 @@ class IndeedScraper(Scraper): response = session.get(self.url, params=params) if ( - response.status_code != status.HTTP_200_OK - and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT + response.status_code != status.HTTP_200_OK + and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT ): raise StatusException(response.status_code) soup = BeautifulSoup(response.content, "html.parser") - jobs = IndeedScraper.parse_jobs(soup) #: can raise exception, handled by main scrape function + jobs = IndeedScraper.parse_jobs( + soup + ) #: can raise exception, handled by main scrape function total_num_jobs = IndeedScraper.total_jobs(soup) if ( - not jobs.get("metaData", {}) - .get("mosaicProviderJobCardsModel", {}) - .get("results") + not jobs.get("metaData", {}) + .get("mosaicProviderJobCardsModel", {}) + .get("results") ): - raise Exception('No jobs found.') + raise Exception("No jobs found.") for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]: job_url = f'{self.job_url}{job["jobkey"]}' @@ -95,9 +93,7 @@ class IndeedScraper(Scraper): compensation = None if extracted_salary: salary_snippet = job.get("salarySnippet") - currency = ( - salary_snippet.get("currency") if salary_snippet else None - ) + currency = salary_snippet.get("currency") if salary_snippet else None interval = (extracted_salary.get("type"),) if isinstance(interval, tuple): interval = interval[0] @@ -145,7 +141,9 @@ class IndeedScraper(Scraper): client_identifier="chrome112", random_tls_extension_order=True ) - pages_to_process = math.ceil(scraper_input.results_wanted / self.jobs_per_page) - 1 + pages_to_process = ( + math.ceil(scraper_input.results_wanted / self.jobs_per_page) - 1 + ) try: #: get first page to initialize session @@ -153,9 +151,8 @@ class IndeedScraper(Scraper): with ThreadPoolExecutor(max_workers=10) as executor: futures: list[Future] = [ - executor.submit( - self.scrape_page, scraper_input, page, session - ) for page in range(1, pages_to_process + 1) + executor.submit(self.scrape_page, scraper_input, page, session) + for page in range(1, pages_to_process + 1) ] for future in futures: @@ -180,7 +177,7 @@ class IndeedScraper(Scraper): ) if len(job_list) > scraper_input.results_wanted: - job_list = job_list[:scraper_input.results_wanted] + job_list = job_list[: scraper_input.results_wanted] job_response = JobResponse( success=True, @@ -224,9 +221,9 @@ class IndeedScraper(Scraper): script_tags = soup.find_all("script") for tag in script_tags: if ( - tag.string - and "mosaic.providerData" in tag.string - and "mosaic-provider-jobcards" in tag.string + tag.string + and "mosaic.providerData" in tag.string + and "mosaic-provider-jobcards" in tag.string ): return tag return None diff --git a/api/core/scrapers/ziprecruiter/__init__.py b/api/core/scrapers/ziprecruiter/__init__.py index 712cd96..837d237 100644 --- a/api/core/scrapers/ziprecruiter/__init__.py +++ b/api/core/scrapers/ziprecruiter/__init__.py @@ -26,12 +26,8 @@ class ZipRecruiterScraper(Scraper): self.seen_urls = set() def scrape_page( - self, - scraper_input: ScraperInput, - page: int, - session: tls_client.Session + self, scraper_input: ScraperInput, page: int, session: tls_client.Session ) -> tuple[list[JobPost], int | None]: - """ Scrapes a page of ZipRecruiter for jobs with scraper_input criteria :param scraper_input: @@ -140,9 +136,8 @@ class ZipRecruiterScraper(Scraper): with ThreadPoolExecutor(max_workers=10) as executor: futures: list[Future] = [ - executor.submit( - self.scrape_page, scraper_input, page, session - ) for page in range(2, pages_to_process + 1) + executor.submit(self.scrape_page, scraper_input, page, session) + for page in range(2, pages_to_process + 1) ] for future in futures: @@ -159,7 +154,7 @@ class ZipRecruiterScraper(Scraper): #: note: this does not handle if the results are more or less than the results_wanted if len(job_list) > scraper_input.results_wanted: - job_list = job_list[:scraper_input.results_wanted] + job_list = job_list[: scraper_input.results_wanted] job_response = JobResponse( success=True,