refactor(jobs): switch bad response handling to use fastapi.HTTPException

pull/12/head
Cullen Watson 2023-07-09 07:37:18 -05:00
parent 9ef41979fd
commit dd0047a5bb
5 changed files with 36 additions and 51 deletions

View File

@ -48,12 +48,12 @@ class Delivery(BaseModel):
class JobPost(BaseModel): class JobPost(BaseModel):
title: str title: str
description: str = None
company_name: str company_name: str
location: Location location: Location
description: str = None
job_type: JobType = None job_type: JobType = None
compensation: Compensation = None compensation: Compensation = None
date_posted: datetime date_posted: datetime = None
delivery: Delivery = None delivery: Delivery = None

View File

@ -1,13 +1,13 @@
import json
import re import re
import json
from math import ceil from math import ceil
import tls_client import tls_client
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from fastapi import HTTPException, status
from api.core.scrapers import Scraper, ScraperInput, Site
from api.core.jobs import * from api.core.jobs import *
from api.core.utils import handle_response from api.core.scrapers import Scraper, ScraperInput, Site
class IndeedScraper(Scraper): class IndeedScraper(Scraper):
@ -29,9 +29,11 @@ class IndeedScraper(Scraper):
} }
response = session.get(self.url, params=params) response = session.get(self.url, params=params)
success, result = handle_response(response) if response.status_code != status.HTTP_200_OK:
if not success: raise HTTPException(
return result status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Response returned {response.status_code} {response.reason}",
)
soup = BeautifulSoup(response.content, "html.parser") soup = BeautifulSoup(response.content, "html.parser")

View File

@ -2,10 +2,10 @@ from math import ceil
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from fastapi import HTTPException, status
from api.core.scrapers import Scraper, ScraperInput, Site from api.core.scrapers import Scraper, ScraperInput, Site
from api.core.jobs import * from api.core.jobs import *
from api.core.utils import handle_response
class LinkedInScraper(Scraper): class LinkedInScraper(Scraper):
@ -20,9 +20,11 @@ class LinkedInScraper(Scraper):
self.url = f"{self.url}/{scraper_input.search_term}-jobs" self.url = f"{self.url}/{scraper_input.search_term}-jobs"
response = requests.get(self.url, params=params) response = requests.get(self.url, params=params)
success, result = handle_response(response) if response.status_code != status.HTTP_200_OK:
if not success: raise HTTPException(
return result status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Response returned {response.status_code} {response.reason}",
)
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
@ -35,16 +37,15 @@ class LinkedInScraper(Scraper):
job_url = job_url_tag["href"] if job_url_tag else "N/A" job_url = job_url_tag["href"] if job_url_tag else "N/A"
job_info = job_card.find("div", class_="base-search-card__info") job_info = job_card.find("div", class_="base-search-card__info")
if job_info is not None: if job_info is None:
continue
title_tag = job_info.find("h3", class_="base-search-card__title") title_tag = job_info.find("h3", class_="base-search-card__title")
title = title_tag.text.strip() if title_tag else "N/A" title = title_tag.text.strip() if title_tag else "N/A"
company_tag = job_info.find("a", class_="hidden-nested-link") company_tag = job_info.find("a", class_="hidden-nested-link")
company = company_tag.text.strip() if company_tag else "N/A" company = company_tag.text.strip() if company_tag else "N/A"
metadata_card = job_info.find( metadata_card = job_info.find("div", class_="base-search-card__metadata")
"div", class_="base-search-card__metadata"
)
location: Location = LinkedInScraper.get_location(metadata_card) location: Location = LinkedInScraper.get_location(metadata_card)
datetime_tag = metadata_card.find( datetime_tag = metadata_card.find(

View File

@ -2,11 +2,11 @@ import json
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
import tls_client import tls_client
from fastapi import HTTPException, status
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from api.core.scrapers import Scraper, ScraperInput, Site from api.core.scrapers import Scraper, ScraperInput, Site
from api.core.jobs import * from api.core.jobs import *
from api.core.utils import handle_response
class ZipRecruiterScraper(Scraper): class ZipRecruiterScraper(Scraper):
@ -30,9 +30,11 @@ class ZipRecruiterScraper(Scraper):
response = session.get( response = session.get(
self.url, headers=ZipRecruiterScraper.headers(), params=params self.url, headers=ZipRecruiterScraper.headers(), params=params
) )
success, result = handle_response(response) if response.status_code != status.HTTP_200_OK:
if not success: raise HTTPException(
return result status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Response returned {response.status_code} {response.reason}",
)
html_string = response.content html_string = response.content
soup = BeautifulSoup(html_string, "html.parser") soup = BeautifulSoup(html_string, "html.parser")

View File

@ -1,20 +0,0 @@
def handle_response(response):
if response.status_code == 200:
try:
return True, response.json()
except ValueError:
return True, response.text
try:
error_msg = response.json().get("message", "No detailed message provided.")
except ValueError:
error_msg = "No detailed message provided."
error = {
"message": "An error occurred during the request.",
"status_code": response.status_code,
"url": response.url,
"details": error_msg,
}
return False, error