feat(jobs): add site_type param

2023-07-11 08:24:59 -05:00 · 2023-07-11 08:24:59 -05:00 · a5920ddc52
parent d69c41392d
commit a5920ddc52
6 changed files with 64 additions and 22 deletions
--- a/api/core/jobs/init.py
+++ b/api/core/jobs/init.py
@ -1,15 +1,18 @@
-from pydantic import BaseModel
+from typing import Union
 from datetime import datetime
 from enum import Enum

+from pydantic import BaseModel
+

 class JobType(Enum):
-    FULL_TIME = "full_time"
-    PART_TIME = "part_time"
-    CONTRACT = "contractor"
-    INTERNSHIP = "internship"
+    FULL_TIME = "fulltime"
+    PART_TIME = "parttime"
+    CONTRACT = "contract"
    TEMPORARY = "temporary"
-    PER_DIEM = "per_diem"
+    INTERNSHIP = "internship"
+
+    PER_DIEM = "perdiem"
    NIGHTS = "nights"
    OTHER = "other"

--- a/api/core/scrapers/init.py
+++ b/api/core/scrapers/init.py
@ -1,6 +1,4 @@
-from pydantic import BaseModel
-from enum import Enum
-from ..jobs import JobResponse
+from ..jobs import *


 class Site(Enum):
@ -16,6 +14,7 @@ class ScraperInput(BaseModel):
    location: str = None
    distance: int = None
    is_remote: bool = False
+    job_type: JobType = None

    results_wanted: int = 15

--- a/api/core/scrapers/indeed/init.py
+++ b/api/core/scrapers/indeed/init.py
@ -44,17 +44,23 @@ class IndeedScraper(Scraper):
                "q": scraper_input.search_term,
                "location": scraper_input.location,
                "radius": scraper_input.distance,
-                "sc": "0kf:attr(DSQF7);" if scraper_input.is_remote else None,
                "filter": 0,
                "start": 0 + page * 10,
            }
+            sc_values = []
+            if scraper_input.is_remote:
+                sc_values.append("attr(DSQF7)")
+            if scraper_input.job_type:
+                sc_values.append("jt({})".format(scraper_input.job_type.value))

+            if sc_values:
+                params["sc"] = "0kf:" + "".join(sc_values) + ";"
            response = session.get(self.url, params=params)

-            if response.status_code == 307:
-                new_url = response.headers["Location"]
-                response = session.get(new_url)
-            if response.status_code != status.HTTP_200_OK:
+            if (
+                response.status_code != status.HTTP_200_OK
+                and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT
+            ):
                return JobResponse(
                    success=False,
                    error=f"Response returned {response.status_code}",
@ -150,7 +156,7 @@ class IndeedScraper(Scraper):
    @staticmethod
    def get_job_type(job: dict) -> Optional[JobType]:
        """
-        Parses the job to get JobType
+        Parses the job to get JobTypeIndeed
        :param job:
        :return:
        """
--- a/api/core/scrapers/linkedin/init.py
+++ b/api/core/scrapers/linkedin/init.py
@ -30,6 +30,17 @@ class LinkedInScraper(Scraper):
        seen_urls = set()
        page, processed_jobs, job_count = 0, 0, 0

+        def job_type_code(job_type):
+            mapping = {
+                JobType.FULL_TIME: "F",
+                JobType.PART_TIME: "P",
+                JobType.INTERNSHIP: "I",
+                JobType.CONTRACT: "C",
+                JobType.TEMPORARY: "T",
+            }
+
+            return mapping.get(job_type, "")
+
        with requests.Session() as session:
            while len(job_list) < scraper_input.results_wanted:
                params = {
@ -37,6 +48,9 @@ class LinkedInScraper(Scraper):
                    "location": scraper_input.location,
                    "distance": scraper_input.distance,
                    "f_WT": 2 if scraper_input.is_remote else None,
+                    "f_JT": job_type_code(scraper_input.job_type)
+                    if scraper_input.job_type
+                    else None,
                    "pageNum": page,
                }

--- a/api/core/scrapers/ziprecruiter/init.py
+++ b/api/core/scrapers/ziprecruiter/init.py
@ -35,6 +35,15 @@ class ZipRecruiterScraper(Scraper):
        processed_jobs, job_count = 0, 0
        seen_urls = set()
        while len(job_list) < scraper_input.results_wanted:
+            job_type_value = None
+            if scraper_input.job_type:
+                if scraper_input.job_type.value == "fulltime":
+                    job_type_value = "full_time"
+                elif scraper_input.job_type.value == "parttime":
+                    job_type_value = "part_time"
+                else:
+                    job_type_value = scraper_input.job_type.value
+
            params = {
                "search": scraper_input.search_term,
                "location": scraper_input.location,
@ -42,12 +51,16 @@ class ZipRecruiterScraper(Scraper):
                "refine_by_location_type": "only_remote"
                if scraper_input.is_remote
                else None,
+                "refine_by_employment": f"employment_type:employment_type:{job_type_value}"
+                if job_type_value
+                else None,
                "page": page,
            }

            response = session.get(
                self.url, headers=ZipRecruiterScraper.headers(), params=params
            )
+            print(response.url)
            if response.status_code != status.HTTP_200_OK:
                return JobResponse(
                    success=False,
@ -74,15 +87,22 @@ class ZipRecruiterScraper(Scraper):
                company = job.find("a", {"class": "company_name"}).text.strip()
                description = job.find("p", {"class": "job_snippet"}).text.strip()
                job_type_element = job.find("li", {"class": "perk_item perk_type"})
-                job_type = (
-                    job_type_element.text.strip().lower().replace("-", "_")
-                    if job_type_element
-                    else None
-                )
+
+                if job_type_element:
+                    job_type_text = (
+                        job_type_element.text.strip()
+                        .lower()
+                        .replace("-", "")
+                        .replace(" ", "")
+                    )
+                    if job_type_text == "contractor":
+                        job_type_text = "contract"
+                    job_type = JobType(job_type_text)
+                else:
+                    job_type = None

                date_posted = ZipRecruiterScraper.get_date_posted(job)

-                job_type = job_type.replace(" ", "_") if job_type else job_type
                job_post = JobPost(
                    title=title,
                    description=description,
--- a/settings.py
+++ b/settings.py
@ -6,4 +6,4 @@ SUPABASE_URL = os.environ.get("SUPABASE_URL")
 SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
 JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
 ALGORITHM = "HS256"
-ACCESS_TOKEN_EXPIRE_MINUTES = 120
+ACCESS_TOKEN_EXPIRE_MINUTES = 3600