diff --git a/api/core/jobs/__init__.py b/api/core/jobs/__init__.py index 37262e5..91aa5d5 100644 --- a/api/core/jobs/__init__.py +++ b/api/core/jobs/__init__.py @@ -1,15 +1,18 @@ -from pydantic import BaseModel +from typing import Union from datetime import datetime from enum import Enum +from pydantic import BaseModel + class JobType(Enum): - FULL_TIME = "full_time" - PART_TIME = "part_time" - CONTRACT = "contractor" - INTERNSHIP = "internship" + FULL_TIME = "fulltime" + PART_TIME = "parttime" + CONTRACT = "contract" TEMPORARY = "temporary" - PER_DIEM = "per_diem" + INTERNSHIP = "internship" + + PER_DIEM = "perdiem" NIGHTS = "nights" OTHER = "other" diff --git a/api/core/scrapers/__init__.py b/api/core/scrapers/__init__.py index 80dc204..94d3dd4 100644 --- a/api/core/scrapers/__init__.py +++ b/api/core/scrapers/__init__.py @@ -1,6 +1,4 @@ -from pydantic import BaseModel -from enum import Enum -from ..jobs import JobResponse +from ..jobs import * class Site(Enum): @@ -16,6 +14,7 @@ class ScraperInput(BaseModel): location: str = None distance: int = None is_remote: bool = False + job_type: JobType = None results_wanted: int = 15 diff --git a/api/core/scrapers/indeed/__init__.py b/api/core/scrapers/indeed/__init__.py index 3d6a158..c9b8ae9 100644 --- a/api/core/scrapers/indeed/__init__.py +++ b/api/core/scrapers/indeed/__init__.py @@ -44,17 +44,23 @@ class IndeedScraper(Scraper): "q": scraper_input.search_term, "location": scraper_input.location, "radius": scraper_input.distance, - "sc": "0kf:attr(DSQF7);" if scraper_input.is_remote else None, "filter": 0, "start": 0 + page * 10, } + sc_values = [] + if scraper_input.is_remote: + sc_values.append("attr(DSQF7)") + if scraper_input.job_type: + sc_values.append("jt({})".format(scraper_input.job_type.value)) + if sc_values: + params["sc"] = "0kf:" + "".join(sc_values) + ";" response = session.get(self.url, params=params) - if response.status_code == 307: - new_url = response.headers["Location"] - response = session.get(new_url) - if response.status_code != status.HTTP_200_OK: + if ( + response.status_code != status.HTTP_200_OK + and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT + ): return JobResponse( success=False, error=f"Response returned {response.status_code}", @@ -150,7 +156,7 @@ class IndeedScraper(Scraper): @staticmethod def get_job_type(job: dict) -> Optional[JobType]: """ - Parses the job to get JobType + Parses the job to get JobTypeIndeed :param job: :return: """ diff --git a/api/core/scrapers/linkedin/__init__.py b/api/core/scrapers/linkedin/__init__.py index d88db08..f3cd68d 100644 --- a/api/core/scrapers/linkedin/__init__.py +++ b/api/core/scrapers/linkedin/__init__.py @@ -30,6 +30,17 @@ class LinkedInScraper(Scraper): seen_urls = set() page, processed_jobs, job_count = 0, 0, 0 + def job_type_code(job_type): + mapping = { + JobType.FULL_TIME: "F", + JobType.PART_TIME: "P", + JobType.INTERNSHIP: "I", + JobType.CONTRACT: "C", + JobType.TEMPORARY: "T", + } + + return mapping.get(job_type, "") + with requests.Session() as session: while len(job_list) < scraper_input.results_wanted: params = { @@ -37,6 +48,9 @@ class LinkedInScraper(Scraper): "location": scraper_input.location, "distance": scraper_input.distance, "f_WT": 2 if scraper_input.is_remote else None, + "f_JT": job_type_code(scraper_input.job_type) + if scraper_input.job_type + else None, "pageNum": page, } diff --git a/api/core/scrapers/ziprecruiter/__init__.py b/api/core/scrapers/ziprecruiter/__init__.py index 8f9cb73..117d4c4 100644 --- a/api/core/scrapers/ziprecruiter/__init__.py +++ b/api/core/scrapers/ziprecruiter/__init__.py @@ -35,6 +35,15 @@ class ZipRecruiterScraper(Scraper): processed_jobs, job_count = 0, 0 seen_urls = set() while len(job_list) < scraper_input.results_wanted: + job_type_value = None + if scraper_input.job_type: + if scraper_input.job_type.value == "fulltime": + job_type_value = "full_time" + elif scraper_input.job_type.value == "parttime": + job_type_value = "part_time" + else: + job_type_value = scraper_input.job_type.value + params = { "search": scraper_input.search_term, "location": scraper_input.location, @@ -42,12 +51,16 @@ class ZipRecruiterScraper(Scraper): "refine_by_location_type": "only_remote" if scraper_input.is_remote else None, + "refine_by_employment": f"employment_type:employment_type:{job_type_value}" + if job_type_value + else None, "page": page, } response = session.get( self.url, headers=ZipRecruiterScraper.headers(), params=params ) + print(response.url) if response.status_code != status.HTTP_200_OK: return JobResponse( success=False, @@ -74,15 +87,22 @@ class ZipRecruiterScraper(Scraper): company = job.find("a", {"class": "company_name"}).text.strip() description = job.find("p", {"class": "job_snippet"}).text.strip() job_type_element = job.find("li", {"class": "perk_item perk_type"}) - job_type = ( - job_type_element.text.strip().lower().replace("-", "_") - if job_type_element - else None - ) + + if job_type_element: + job_type_text = ( + job_type_element.text.strip() + .lower() + .replace("-", "") + .replace(" ", "") + ) + if job_type_text == "contractor": + job_type_text = "contract" + job_type = JobType(job_type_text) + else: + job_type = None date_posted = ZipRecruiterScraper.get_date_posted(job) - job_type = job_type.replace(" ", "_") if job_type else job_type job_post = JobPost( title=title, description=description, diff --git a/settings.py b/settings.py index 0cab126..39850be 100644 --- a/settings.py +++ b/settings.py @@ -6,4 +6,4 @@ SUPABASE_URL = os.environ.get("SUPABASE_URL") SUPABASE_KEY = os.environ.get("SUPABASE_KEY") JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY") ALGORITHM = "HS256" -ACCESS_TOKEN_EXPIRE_MINUTES = 120 +ACCESS_TOKEN_EXPIRE_MINUTES = 3600