refactor(jobs): switch bad response handling to use fastapi.HTTPException

pull/12/head
Cullen Watson 2023-07-09 07:37:18 -05:00
parent 9ef41979fd
commit dd0047a5bb
5 changed files with 36 additions and 51 deletions

View File

@ -48,12 +48,12 @@ class Delivery(BaseModel):
class JobPost(BaseModel):
title: str
description: str = None
company_name: str
location: Location
description: str = None
job_type: JobType = None
compensation: Compensation = None
date_posted: datetime
date_posted: datetime = None
delivery: Delivery = None

View File

@ -1,13 +1,13 @@
import json
import re
import json
from math import ceil
import tls_client
from bs4 import BeautifulSoup
from fastapi import HTTPException, status
from api.core.scrapers import Scraper, ScraperInput, Site
from api.core.jobs import *
from api.core.utils import handle_response
from api.core.scrapers import Scraper, ScraperInput, Site
class IndeedScraper(Scraper):
@ -29,9 +29,11 @@ class IndeedScraper(Scraper):
}
response = session.get(self.url, params=params)
success, result = handle_response(response)
if not success:
return result
if response.status_code != status.HTTP_200_OK:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Response returned {response.status_code} {response.reason}",
)
soup = BeautifulSoup(response.content, "html.parser")

View File

@ -2,10 +2,10 @@ from math import ceil
import requests
from bs4 import BeautifulSoup
from fastapi import HTTPException, status
from api.core.scrapers import Scraper, ScraperInput, Site
from api.core.jobs import *
from api.core.utils import handle_response
class LinkedInScraper(Scraper):
@ -20,9 +20,11 @@ class LinkedInScraper(Scraper):
self.url = f"{self.url}/{scraper_input.search_term}-jobs"
response = requests.get(self.url, params=params)
success, result = handle_response(response)
if not success:
return result
if response.status_code != status.HTTP_200_OK:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Response returned {response.status_code} {response.reason}",
)
soup = BeautifulSoup(response.text, "html.parser")
@ -35,24 +37,23 @@ class LinkedInScraper(Scraper):
job_url = job_url_tag["href"] if job_url_tag else "N/A"
job_info = job_card.find("div", class_="base-search-card__info")
if job_info is not None:
title_tag = job_info.find("h3", class_="base-search-card__title")
title = title_tag.text.strip() if title_tag else "N/A"
if job_info is None:
continue
title_tag = job_info.find("h3", class_="base-search-card__title")
title = title_tag.text.strip() if title_tag else "N/A"
company_tag = job_info.find("a", class_="hidden-nested-link")
company = company_tag.text.strip() if company_tag else "N/A"
company_tag = job_info.find("a", class_="hidden-nested-link")
company = company_tag.text.strip() if company_tag else "N/A"
metadata_card = job_info.find(
"div", class_="base-search-card__metadata"
)
location: Location = LinkedInScraper.get_location(metadata_card)
metadata_card = job_info.find("div", class_="base-search-card__metadata")
location: Location = LinkedInScraper.get_location(metadata_card)
datetime_tag = metadata_card.find(
"time", class_="job-search-card__listdate"
)
if datetime_tag:
datetime_str = datetime_tag["datetime"]
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
datetime_tag = metadata_card.find(
"time", class_="job-search-card__listdate"
)
if datetime_tag:
datetime_str = datetime_tag["datetime"]
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
job_post = JobPost(
title=title,

View File

@ -2,11 +2,11 @@ import json
from urllib.parse import urlparse, parse_qs
import tls_client
from fastapi import HTTPException, status
from bs4 import BeautifulSoup
from api.core.scrapers import Scraper, ScraperInput, Site
from api.core.jobs import *
from api.core.utils import handle_response
class ZipRecruiterScraper(Scraper):
@ -30,9 +30,11 @@ class ZipRecruiterScraper(Scraper):
response = session.get(
self.url, headers=ZipRecruiterScraper.headers(), params=params
)
success, result = handle_response(response)
if not success:
return result
if response.status_code != status.HTTP_200_OK:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Response returned {response.status_code} {response.reason}",
)
html_string = response.content
soup = BeautifulSoup(html_string, "html.parser")

View File

@ -1,20 +0,0 @@
def handle_response(response):
if response.status_code == 200:
try:
return True, response.json()
except ValueError:
return True, response.text
try:
error_msg = response.json().get("message", "No detailed message provided.")
except ValueError:
error_msg = "No detailed message provided."
error = {
"message": "An error occurred during the request.",
"status_code": response.status_code,
"url": response.url,
"details": error_msg,
}
return False, error