mirror of https://github.com/Bunsly/JobSpy
refactor(jobs): switch bad response handling to use fastapi.HTTPException
parent
9ef41979fd
commit
dd0047a5bb
|
@ -48,12 +48,12 @@ class Delivery(BaseModel):
|
||||||
|
|
||||||
class JobPost(BaseModel):
|
class JobPost(BaseModel):
|
||||||
title: str
|
title: str
|
||||||
description: str = None
|
|
||||||
company_name: str
|
company_name: str
|
||||||
location: Location
|
location: Location
|
||||||
|
description: str = None
|
||||||
job_type: JobType = None
|
job_type: JobType = None
|
||||||
compensation: Compensation = None
|
compensation: Compensation = None
|
||||||
date_posted: datetime
|
date_posted: datetime = None
|
||||||
delivery: Delivery = None
|
delivery: Delivery = None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
import tls_client
|
import tls_client
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from fastapi import HTTPException, status
|
||||||
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site
|
|
||||||
from api.core.jobs import *
|
from api.core.jobs import *
|
||||||
from api.core.utils import handle_response
|
from api.core.scrapers import Scraper, ScraperInput, Site
|
||||||
|
|
||||||
|
|
||||||
class IndeedScraper(Scraper):
|
class IndeedScraper(Scraper):
|
||||||
|
@ -29,9 +29,11 @@ class IndeedScraper(Scraper):
|
||||||
}
|
}
|
||||||
|
|
||||||
response = session.get(self.url, params=params)
|
response = session.get(self.url, params=params)
|
||||||
success, result = handle_response(response)
|
if response.status_code != status.HTTP_200_OK:
|
||||||
if not success:
|
raise HTTPException(
|
||||||
return result
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=f"Response returned {response.status_code} {response.reason}",
|
||||||
|
)
|
||||||
|
|
||||||
soup = BeautifulSoup(response.content, "html.parser")
|
soup = BeautifulSoup(response.content, "html.parser")
|
||||||
|
|
||||||
|
|
|
@ -2,10 +2,10 @@ from math import ceil
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from fastapi import HTTPException, status
|
||||||
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site
|
from api.core.scrapers import Scraper, ScraperInput, Site
|
||||||
from api.core.jobs import *
|
from api.core.jobs import *
|
||||||
from api.core.utils import handle_response
|
|
||||||
|
|
||||||
|
|
||||||
class LinkedInScraper(Scraper):
|
class LinkedInScraper(Scraper):
|
||||||
|
@ -20,9 +20,11 @@ class LinkedInScraper(Scraper):
|
||||||
|
|
||||||
self.url = f"{self.url}/{scraper_input.search_term}-jobs"
|
self.url = f"{self.url}/{scraper_input.search_term}-jobs"
|
||||||
response = requests.get(self.url, params=params)
|
response = requests.get(self.url, params=params)
|
||||||
success, result = handle_response(response)
|
if response.status_code != status.HTTP_200_OK:
|
||||||
if not success:
|
raise HTTPException(
|
||||||
return result
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=f"Response returned {response.status_code} {response.reason}",
|
||||||
|
)
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
@ -35,24 +37,23 @@ class LinkedInScraper(Scraper):
|
||||||
job_url = job_url_tag["href"] if job_url_tag else "N/A"
|
job_url = job_url_tag["href"] if job_url_tag else "N/A"
|
||||||
|
|
||||||
job_info = job_card.find("div", class_="base-search-card__info")
|
job_info = job_card.find("div", class_="base-search-card__info")
|
||||||
if job_info is not None:
|
if job_info is None:
|
||||||
title_tag = job_info.find("h3", class_="base-search-card__title")
|
continue
|
||||||
title = title_tag.text.strip() if title_tag else "N/A"
|
title_tag = job_info.find("h3", class_="base-search-card__title")
|
||||||
|
title = title_tag.text.strip() if title_tag else "N/A"
|
||||||
|
|
||||||
company_tag = job_info.find("a", class_="hidden-nested-link")
|
company_tag = job_info.find("a", class_="hidden-nested-link")
|
||||||
company = company_tag.text.strip() if company_tag else "N/A"
|
company = company_tag.text.strip() if company_tag else "N/A"
|
||||||
|
|
||||||
metadata_card = job_info.find(
|
metadata_card = job_info.find("div", class_="base-search-card__metadata")
|
||||||
"div", class_="base-search-card__metadata"
|
location: Location = LinkedInScraper.get_location(metadata_card)
|
||||||
)
|
|
||||||
location: Location = LinkedInScraper.get_location(metadata_card)
|
|
||||||
|
|
||||||
datetime_tag = metadata_card.find(
|
datetime_tag = metadata_card.find(
|
||||||
"time", class_="job-search-card__listdate"
|
"time", class_="job-search-card__listdate"
|
||||||
)
|
)
|
||||||
if datetime_tag:
|
if datetime_tag:
|
||||||
datetime_str = datetime_tag["datetime"]
|
datetime_str = datetime_tag["datetime"]
|
||||||
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
|
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
|
||||||
|
|
||||||
job_post = JobPost(
|
job_post = JobPost(
|
||||||
title=title,
|
title=title,
|
||||||
|
|
|
@ -2,11 +2,11 @@ import json
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
import tls_client
|
import tls_client
|
||||||
|
from fastapi import HTTPException, status
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site
|
from api.core.scrapers import Scraper, ScraperInput, Site
|
||||||
from api.core.jobs import *
|
from api.core.jobs import *
|
||||||
from api.core.utils import handle_response
|
|
||||||
|
|
||||||
|
|
||||||
class ZipRecruiterScraper(Scraper):
|
class ZipRecruiterScraper(Scraper):
|
||||||
|
@ -30,9 +30,11 @@ class ZipRecruiterScraper(Scraper):
|
||||||
response = session.get(
|
response = session.get(
|
||||||
self.url, headers=ZipRecruiterScraper.headers(), params=params
|
self.url, headers=ZipRecruiterScraper.headers(), params=params
|
||||||
)
|
)
|
||||||
success, result = handle_response(response)
|
if response.status_code != status.HTTP_200_OK:
|
||||||
if not success:
|
raise HTTPException(
|
||||||
return result
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=f"Response returned {response.status_code} {response.reason}",
|
||||||
|
)
|
||||||
|
|
||||||
html_string = response.content
|
html_string = response.content
|
||||||
soup = BeautifulSoup(html_string, "html.parser")
|
soup = BeautifulSoup(html_string, "html.parser")
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
def handle_response(response):
|
|
||||||
if response.status_code == 200:
|
|
||||||
try:
|
|
||||||
return True, response.json()
|
|
||||||
except ValueError:
|
|
||||||
return True, response.text
|
|
||||||
|
|
||||||
try:
|
|
||||||
error_msg = response.json().get("message", "No detailed message provided.")
|
|
||||||
except ValueError:
|
|
||||||
error_msg = "No detailed message provided."
|
|
||||||
|
|
||||||
error = {
|
|
||||||
"message": "An error occurred during the request.",
|
|
||||||
"status_code": response.status_code,
|
|
||||||
"url": response.url,
|
|
||||||
"details": error_msg,
|
|
||||||
}
|
|
||||||
|
|
||||||
return False, error
|
|
Loading…
Reference in New Issue