diff --git a/api/core/jobs/__init__.py b/api/core/jobs/__init__.py index 476922b..2026422 100644 --- a/api/core/jobs/__init__.py +++ b/api/core/jobs/__init__.py @@ -48,12 +48,12 @@ class Delivery(BaseModel): class JobPost(BaseModel): title: str - description: str = None company_name: str location: Location + description: str = None job_type: JobType = None compensation: Compensation = None - date_posted: datetime + date_posted: datetime = None delivery: Delivery = None diff --git a/api/core/scrapers/indeed/__init__.py b/api/core/scrapers/indeed/__init__.py index eb200c3..f89d8a0 100644 --- a/api/core/scrapers/indeed/__init__.py +++ b/api/core/scrapers/indeed/__init__.py @@ -1,13 +1,13 @@ -import json import re +import json from math import ceil import tls_client from bs4 import BeautifulSoup +from fastapi import HTTPException, status -from api.core.scrapers import Scraper, ScraperInput, Site from api.core.jobs import * -from api.core.utils import handle_response +from api.core.scrapers import Scraper, ScraperInput, Site class IndeedScraper(Scraper): @@ -29,9 +29,11 @@ class IndeedScraper(Scraper): } response = session.get(self.url, params=params) - success, result = handle_response(response) - if not success: - return result + if response.status_code != status.HTTP_200_OK: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Response returned {response.status_code} {response.reason}", + ) soup = BeautifulSoup(response.content, "html.parser") diff --git a/api/core/scrapers/linkedin/__init__.py b/api/core/scrapers/linkedin/__init__.py index c840272..198b0dc 100644 --- a/api/core/scrapers/linkedin/__init__.py +++ b/api/core/scrapers/linkedin/__init__.py @@ -2,10 +2,10 @@ from math import ceil import requests from bs4 import BeautifulSoup +from fastapi import HTTPException, status from api.core.scrapers import Scraper, ScraperInput, Site from api.core.jobs import * -from api.core.utils import handle_response class LinkedInScraper(Scraper): @@ -20,9 +20,11 @@ class LinkedInScraper(Scraper): self.url = f"{self.url}/{scraper_input.search_term}-jobs" response = requests.get(self.url, params=params) - success, result = handle_response(response) - if not success: - return result + if response.status_code != status.HTTP_200_OK: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Response returned {response.status_code} {response.reason}", + ) soup = BeautifulSoup(response.text, "html.parser") @@ -35,24 +37,23 @@ class LinkedInScraper(Scraper): job_url = job_url_tag["href"] if job_url_tag else "N/A" job_info = job_card.find("div", class_="base-search-card__info") - if job_info is not None: - title_tag = job_info.find("h3", class_="base-search-card__title") - title = title_tag.text.strip() if title_tag else "N/A" + if job_info is None: + continue + title_tag = job_info.find("h3", class_="base-search-card__title") + title = title_tag.text.strip() if title_tag else "N/A" - company_tag = job_info.find("a", class_="hidden-nested-link") - company = company_tag.text.strip() if company_tag else "N/A" + company_tag = job_info.find("a", class_="hidden-nested-link") + company = company_tag.text.strip() if company_tag else "N/A" - metadata_card = job_info.find( - "div", class_="base-search-card__metadata" - ) - location: Location = LinkedInScraper.get_location(metadata_card) + metadata_card = job_info.find("div", class_="base-search-card__metadata") + location: Location = LinkedInScraper.get_location(metadata_card) - datetime_tag = metadata_card.find( - "time", class_="job-search-card__listdate" - ) - if datetime_tag: - datetime_str = datetime_tag["datetime"] - date_posted = datetime.strptime(datetime_str, "%Y-%m-%d") + datetime_tag = metadata_card.find( + "time", class_="job-search-card__listdate" + ) + if datetime_tag: + datetime_str = datetime_tag["datetime"] + date_posted = datetime.strptime(datetime_str, "%Y-%m-%d") job_post = JobPost( title=title, diff --git a/api/core/scrapers/ziprecruiter/__init__.py b/api/core/scrapers/ziprecruiter/__init__.py index 65634eb..89fd0ec 100644 --- a/api/core/scrapers/ziprecruiter/__init__.py +++ b/api/core/scrapers/ziprecruiter/__init__.py @@ -2,11 +2,11 @@ import json from urllib.parse import urlparse, parse_qs import tls_client +from fastapi import HTTPException, status from bs4 import BeautifulSoup from api.core.scrapers import Scraper, ScraperInput, Site from api.core.jobs import * -from api.core.utils import handle_response class ZipRecruiterScraper(Scraper): @@ -30,9 +30,11 @@ class ZipRecruiterScraper(Scraper): response = session.get( self.url, headers=ZipRecruiterScraper.headers(), params=params ) - success, result = handle_response(response) - if not success: - return result + if response.status_code != status.HTTP_200_OK: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Response returned {response.status_code} {response.reason}", + ) html_string = response.content soup = BeautifulSoup(html_string, "html.parser") diff --git a/api/core/utils.py b/api/core/utils.py deleted file mode 100644 index f4a3ec2..0000000 --- a/api/core/utils.py +++ /dev/null @@ -1,20 +0,0 @@ -def handle_response(response): - if response.status_code == 200: - try: - return True, response.json() - except ValueError: - return True, response.text - - try: - error_msg = response.json().get("message", "No detailed message provided.") - except ValueError: - error_msg = "No detailed message provided." - - error = { - "message": "An error occurred during the request.", - "status_code": response.status_code, - "url": response.url, - "details": error_msg, - } - - return False, error