feat(jobs): add site_type param

pull/12/head
Cullen Watson 2023-07-11 08:24:59 -05:00
parent d69c41392d
commit a5920ddc52
6 changed files with 64 additions and 22 deletions

View File

@ -1,15 +1,18 @@
from pydantic import BaseModel from typing import Union
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from pydantic import BaseModel
class JobType(Enum): class JobType(Enum):
FULL_TIME = "full_time" FULL_TIME = "fulltime"
PART_TIME = "part_time" PART_TIME = "parttime"
CONTRACT = "contractor" CONTRACT = "contract"
INTERNSHIP = "internship"
TEMPORARY = "temporary" TEMPORARY = "temporary"
PER_DIEM = "per_diem" INTERNSHIP = "internship"
PER_DIEM = "perdiem"
NIGHTS = "nights" NIGHTS = "nights"
OTHER = "other" OTHER = "other"

View File

@ -1,6 +1,4 @@
from pydantic import BaseModel from ..jobs import *
from enum import Enum
from ..jobs import JobResponse
class Site(Enum): class Site(Enum):
@ -16,6 +14,7 @@ class ScraperInput(BaseModel):
location: str = None location: str = None
distance: int = None distance: int = None
is_remote: bool = False is_remote: bool = False
job_type: JobType = None
results_wanted: int = 15 results_wanted: int = 15

View File

@ -44,17 +44,23 @@ class IndeedScraper(Scraper):
"q": scraper_input.search_term, "q": scraper_input.search_term,
"location": scraper_input.location, "location": scraper_input.location,
"radius": scraper_input.distance, "radius": scraper_input.distance,
"sc": "0kf:attr(DSQF7);" if scraper_input.is_remote else None,
"filter": 0, "filter": 0,
"start": 0 + page * 10, "start": 0 + page * 10,
} }
sc_values = []
if scraper_input.is_remote:
sc_values.append("attr(DSQF7)")
if scraper_input.job_type:
sc_values.append("jt({})".format(scraper_input.job_type.value))
if sc_values:
params["sc"] = "0kf:" + "".join(sc_values) + ";"
response = session.get(self.url, params=params) response = session.get(self.url, params=params)
if response.status_code == 307: if (
new_url = response.headers["Location"] response.status_code != status.HTTP_200_OK
response = session.get(new_url) and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT
if response.status_code != status.HTTP_200_OK: ):
return JobResponse( return JobResponse(
success=False, success=False,
error=f"Response returned {response.status_code}", error=f"Response returned {response.status_code}",
@ -150,7 +156,7 @@ class IndeedScraper(Scraper):
@staticmethod @staticmethod
def get_job_type(job: dict) -> Optional[JobType]: def get_job_type(job: dict) -> Optional[JobType]:
""" """
Parses the job to get JobType Parses the job to get JobTypeIndeed
:param job: :param job:
:return: :return:
""" """

View File

@ -30,6 +30,17 @@ class LinkedInScraper(Scraper):
seen_urls = set() seen_urls = set()
page, processed_jobs, job_count = 0, 0, 0 page, processed_jobs, job_count = 0, 0, 0
def job_type_code(job_type):
mapping = {
JobType.FULL_TIME: "F",
JobType.PART_TIME: "P",
JobType.INTERNSHIP: "I",
JobType.CONTRACT: "C",
JobType.TEMPORARY: "T",
}
return mapping.get(job_type, "")
with requests.Session() as session: with requests.Session() as session:
while len(job_list) < scraper_input.results_wanted: while len(job_list) < scraper_input.results_wanted:
params = { params = {
@ -37,6 +48,9 @@ class LinkedInScraper(Scraper):
"location": scraper_input.location, "location": scraper_input.location,
"distance": scraper_input.distance, "distance": scraper_input.distance,
"f_WT": 2 if scraper_input.is_remote else None, "f_WT": 2 if scraper_input.is_remote else None,
"f_JT": job_type_code(scraper_input.job_type)
if scraper_input.job_type
else None,
"pageNum": page, "pageNum": page,
} }

View File

@ -35,6 +35,15 @@ class ZipRecruiterScraper(Scraper):
processed_jobs, job_count = 0, 0 processed_jobs, job_count = 0, 0
seen_urls = set() seen_urls = set()
while len(job_list) < scraper_input.results_wanted: while len(job_list) < scraper_input.results_wanted:
job_type_value = None
if scraper_input.job_type:
if scraper_input.job_type.value == "fulltime":
job_type_value = "full_time"
elif scraper_input.job_type.value == "parttime":
job_type_value = "part_time"
else:
job_type_value = scraper_input.job_type.value
params = { params = {
"search": scraper_input.search_term, "search": scraper_input.search_term,
"location": scraper_input.location, "location": scraper_input.location,
@ -42,12 +51,16 @@ class ZipRecruiterScraper(Scraper):
"refine_by_location_type": "only_remote" "refine_by_location_type": "only_remote"
if scraper_input.is_remote if scraper_input.is_remote
else None, else None,
"refine_by_employment": f"employment_type:employment_type:{job_type_value}"
if job_type_value
else None,
"page": page, "page": page,
} }
response = session.get( response = session.get(
self.url, headers=ZipRecruiterScraper.headers(), params=params self.url, headers=ZipRecruiterScraper.headers(), params=params
) )
print(response.url)
if response.status_code != status.HTTP_200_OK: if response.status_code != status.HTTP_200_OK:
return JobResponse( return JobResponse(
success=False, success=False,
@ -74,15 +87,22 @@ class ZipRecruiterScraper(Scraper):
company = job.find("a", {"class": "company_name"}).text.strip() company = job.find("a", {"class": "company_name"}).text.strip()
description = job.find("p", {"class": "job_snippet"}).text.strip() description = job.find("p", {"class": "job_snippet"}).text.strip()
job_type_element = job.find("li", {"class": "perk_item perk_type"}) job_type_element = job.find("li", {"class": "perk_item perk_type"})
job_type = (
job_type_element.text.strip().lower().replace("-", "_") if job_type_element:
if job_type_element job_type_text = (
else None job_type_element.text.strip()
) .lower()
.replace("-", "")
.replace(" ", "")
)
if job_type_text == "contractor":
job_type_text = "contract"
job_type = JobType(job_type_text)
else:
job_type = None
date_posted = ZipRecruiterScraper.get_date_posted(job) date_posted = ZipRecruiterScraper.get_date_posted(job)
job_type = job_type.replace(" ", "_") if job_type else job_type
job_post = JobPost( job_post = JobPost(
title=title, title=title,
description=description, description=description,

View File

@ -6,4 +6,4 @@ SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY") SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY") JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
ALGORITHM = "HS256" ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 120 ACCESS_TOKEN_EXPIRE_MINUTES = 3600