proj structure

2026-03-04 11:34:47 -08:00 · 2023-09-03 12:05:50 -05:00
parent dafed42d58
commit 8579c8e985
11 changed files with 15 additions and 18 deletions
--- a/jobspy/init.py
+++ b/jobspy/init.py
@@ -1,121 +0,0 @@
-import pandas as pd
-from typing import List, Dict, Tuple, Union
-
-from concurrent.futures import ThreadPoolExecutor
-
-from .core.jobs import JobType
-from .core.scrapers.indeed import IndeedScraper
-from .core.scrapers.ziprecruiter import ZipRecruiterScraper
-from .core.scrapers.linkedin import LinkedInScraper
-from .core.scrapers import (
-    ScraperInput,
-    Site,
-    JobResponse,
-    CommonResponse,
-)
-
-
-SCRAPER_MAPPING = {
-    Site.LINKEDIN: LinkedInScraper,
-    Site.INDEED: IndeedScraper,
-    Site.ZIP_RECRUITER: ZipRecruiterScraper,
-}
-
-
-def _map_str_to_site(site_name: str) -> Site:
-    return Site[site_name.upper()]
-
-
-def scrape_jobs(
-        site_name: str | Site | List[Site],
-        search_term: str,
-
-        location: str = "",
-        distance: int = None,
-        is_remote: bool = False,
-        job_type: JobType = None,
-        easy_apply: bool = False,  # linkedin
-        results_wanted: int = 15
-) -> pd.DataFrame:
-    """
-    Asynchronously scrapes job data from multiple job sites.
-    :return: results_wanted: pandas dataframe containing job data
-    """
-
-    if type(site_name) == str:
-        site_name = _map_str_to_site(site_name)
-
-    site_type = [site_name] if type(site_name) == Site else site_name
-    scraper_input = ScraperInput(
-        site_type=site_type,
-        search_term=search_term,
-        location=location,
-        distance=distance,
-        is_remote=is_remote,
-        job_type=job_type,
-        easy_apply=easy_apply,
-        results_wanted=results_wanted,
-    )
-
-    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
-        scraper_class = SCRAPER_MAPPING[site]
-        scraper = scraper_class()
-        scraped_data: JobResponse = scraper.scrape(scraper_input)
-
-        return site.value, scraped_data
-
-    results = {}
-    for site in scraper_input.site_type:
-        site_value, scraped_data = scrape_site(site)
-        results[site_value] = scraped_data
-
-    dfs = []
-
-    for site, job_response in results.items():
-        for job in job_response.jobs:
-            data = job.dict()
-            data['site'] = site
-
-            # Formatting JobType
-            data['job_type'] = data['job_type'].value if data['job_type'] else None
-
-            # Formatting Location
-            location_obj = data.get('location')
-            if location_obj and isinstance(location_obj, dict):
-                data['city'] = location_obj.get('city', '')
-                data['state'] = location_obj.get('state', '')
-                data['country'] = location_obj.get('country', 'USA')
-            else:
-                data['city'] = None
-                data['state'] = None
-                data['country'] = None
-
-            # Formatting Compensation
-            compensation_obj = data.get('compensation')
-            if compensation_obj and isinstance(compensation_obj, dict):
-                data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None
-                data['min_amount'] = compensation_obj.get('min_amount')
-                data['max_amount'] = compensation_obj.get('max_amount')
-                data['currency'] = compensation_obj.get('currency', 'USD')
-            else:
-                data['interval'] = None
-                data['min_amount'] = None
-                data['max_amount'] = None
-                data['currency'] = None
-
-            job_df = pd.DataFrame([data])
-            dfs.append(job_df)
-
-    if dfs:
-        df = pd.concat(dfs, ignore_index=True)
-        desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type',
-                         'interval', 'min_amount', 'max_amount',  'job_url', 'description',]
-        df = df[desired_order]
-    else:
-        df = pd.DataFrame()
-
-    return df
-
-
-
-
--- a/jobspy/core/init.py
+++ b/jobspy/core/init.py
--- a/jobspy/core/jobs/init.py
+++ b/jobspy/core/jobs/init.py
@@ -1,75 +0,0 @@
-from typing import Union, Optional
-from datetime import date
-from enum import Enum
-
-from pydantic import BaseModel, validator
-
-
-class JobType(Enum):
-    FULL_TIME = "fulltime"
-    PART_TIME = "parttime"
-    CONTRACT = "contract"
-    TEMPORARY = "temporary"
-    INTERNSHIP = "internship"
-
-    PER_DIEM = "perdiem"
-    NIGHTS = "nights"
-    OTHER = "other"
-    SUMMER = "summer"
-    VOLUNTEER = "volunteer"
-
-
-
-class Location(BaseModel):
-    country: str = "USA"
-    city: str = None
-    state: Optional[str] = None
-
-
-class CompensationInterval(Enum):
-    YEARLY = "yearly"
-    MONTHLY = "monthly"
-    WEEKLY = "weekly"
-    DAILY = "daily"
-    HOURLY = "hourly"
-
-
-class Compensation(BaseModel):
-    interval: CompensationInterval
-    min_amount: int = None
-    max_amount: int = None
-    currency: str = "USD"
-
-
-class JobPost(BaseModel):
-    title: str
-    company_name: str
-    job_url: str
-    location: Optional[Location]
-
-    description: str = None
-    job_type: Optional[JobType] = None
-    compensation: Optional[Compensation] = None
-    date_posted: date = None
-
-
-class JobResponse(BaseModel):
-    success: bool
-    error: str = None
-
-    total_results: Optional[int] = None
-
-    jobs: list[JobPost] = []
-
-    returned_results: int = None
-
-    @validator("returned_results", pre=True, always=True)
-    def set_returned_results(cls, v, values):
-        jobs_list = values.get("jobs")
-
-        if v is None:
-            if jobs_list is not None:
-                return len(jobs_list)
-            else:
-                return 0
-        return v
--- a/jobspy/core/scrapers/init.py
+++ b/jobspy/core/scrapers/init.py
@@ -1,43 +0,0 @@
-from ..jobs import Enum, BaseModel, JobType, JobResponse
-from typing import List, Dict, Optional, Any
-
-
-class StatusException(Exception):
-    def __init__(self, status_code: int):
-        self.status_code = status_code
-
-
-class Site(Enum):
-    LINKEDIN = "linkedin"
-    INDEED = "indeed"
-    ZIP_RECRUITER = "zip_recruiter"
-
-
-class ScraperInput(BaseModel):
-    site_type: List[Site]
-    search_term: str
-
-    location: str = None
-    distance: Optional[int] = None
-    is_remote: bool = False
-    job_type: Optional[JobType] = None
-    easy_apply: bool = None  # linkedin
-
-    results_wanted: int = 15
-
-
-class CommonResponse(BaseModel):
-    status: Optional[str]
-    error: Optional[str]
-    linkedin: Optional[Any] = None
-    indeed: Optional[Any] = None
-    zip_recruiter: Optional[Any] = None
-
-
-class Scraper:
-    def __init__(self, site: Site, url: str):
-        self.site = site
-        self.url = url
-
-    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
-        ...
--- a/jobspy/core/scrapers/indeed/init.py
+++ b/jobspy/core/scrapers/indeed/init.py
@@ -1,301 +0,0 @@
-import re
-import sys
-import math
-import json
-from datetime import datetime
-from typing import Optional, Tuple, List
-
-import tls_client
-import urllib.parse
-from bs4 import BeautifulSoup
-from bs4.element import Tag
-from concurrent.futures import ThreadPoolExecutor, Future
-
-from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
-from .. import Scraper, ScraperInput, Site, StatusException
-
-
-class ParsingException(Exception):
-    pass
-
-
-class IndeedScraper(Scraper):
-    def __init__(self):
-        """
-        Initializes IndeedScraper with the Indeed job search url
-        """
-        site = Site(Site.INDEED)
-        url = "https://www.indeed.com"
-        super().__init__(site, url)
-
-        self.jobs_per_page = 15
-        self.seen_urls = set()
-
-    def scrape_page(
-        self, scraper_input: ScraperInput, page: int, session: tls_client.Session
-    ) -> tuple[list[JobPost], int]:
-        """
-        Scrapes a page of Indeed for jobs with scraper_input criteria
-        :param scraper_input:
-        :param page:
-        :param session:
-        :return: jobs found on page, total number of jobs found for search
-        """
-
-        job_list = []
-
-        params = {
-            "q": scraper_input.search_term,
-            "l": scraper_input.location,
-            "radius": scraper_input.distance,
-            "filter": 0,
-            "start": 0 + page * 10,
-        }
-        sc_values = []
-        if scraper_input.is_remote:
-            sc_values.append("attr(DSQF7)")
-        if scraper_input.job_type:
-            sc_values.append("jt({})".format(scraper_input.job_type.value))
-
-        if sc_values:
-            params["sc"] = "0kf:" + "".join(sc_values) + ";"
-        response = session.get(self.url + "/jobs", params=params)
-
-        if (
-            response.status_code != 200
-            and response.status_code != 307
-        ):
-            raise StatusException(response.status_code)
-
-        soup = BeautifulSoup(response.content, "html.parser")
-        if "did not match any jobs" in str(soup):
-            raise ParsingException("Search did not match any jobs")
-
-        jobs = IndeedScraper.parse_jobs(
-            soup
-        )  #: can raise exception, handled by main scrape function
-        total_num_jobs = IndeedScraper.total_jobs(soup)
-
-        if (
-            not jobs.get("metaData", {})
-            .get("mosaicProviderJobCardsModel", {})
-            .get("results")
-        ):
-            raise Exception("No jobs found.")
-
-        def process_job(job) -> Optional[JobPost]:
-            job_url = f'{self.url}/jobs/viewjob?jk={job["jobkey"]}'
-            job_url_client = f'{self.url}/viewjob?jk={job["jobkey"]}'
-            if job_url in self.seen_urls:
-                return None
-
-            snippet_html = BeautifulSoup(job["snippet"], "html.parser")
-
-            extracted_salary = job.get("extractedSalary")
-            compensation = None
-            if extracted_salary:
-                salary_snippet = job.get("salarySnippet")
-                currency = salary_snippet.get("currency") if salary_snippet else None
-                interval = (extracted_salary.get("type"),)
-                if isinstance(interval, tuple):
-                    interval = interval[0]
-
-                interval = interval.upper()
-                if interval in CompensationInterval.__members__:
-                    compensation = Compensation(
-                        interval=CompensationInterval[interval],
-                        min_amount=int(extracted_salary.get("max")),
-                        max_amount=int(extracted_salary.get("min")),
-                        currency=currency,
-                    )
-
-            job_type = IndeedScraper.get_job_type(job)
-            timestamp_seconds = job["pubDate"] / 1000
-            date_posted = datetime.fromtimestamp(timestamp_seconds)
-            date_posted = date_posted.strftime("%Y-%m-%d")
-
-            description = self.get_description(job_url, session)
-            li_elements = snippet_html.find_all("li")
-            if description is None and li_elements:
-                description = " ".join(li.text for li in li_elements)
-
-            first_li = snippet_html.find("li")
-            job_post = JobPost(
-                title=job["normTitle"],
-                description=description,
-                company_name=job["company"],
-                location=Location(
-                    city=job.get("jobLocationCity"),
-                    state=job.get("jobLocationState"),
-                ),
-                job_type=job_type,
-                compensation=compensation,
-                date_posted=date_posted,
-                job_url=job_url_client,
-            )
-            return job_post
-
-        with ThreadPoolExecutor(max_workers=10) as executor:
-            job_results: list[Future] = [executor.submit(process_job, job) for job in
-                                         jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]]
-
-        job_list = [result.result() for result in job_results if result.result()]
-
-        return job_list, total_num_jobs
-
-    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
-        """
-        Scrapes Indeed for jobs with scraper_input criteria
-        :param scraper_input:
-        :return: job_response
-        """
-        session = tls_client.Session(
-            client_identifier="chrome112", random_tls_extension_order=True
-        )
-
-        pages_to_process = (
-            math.ceil(scraper_input.results_wanted / self.jobs_per_page) - 1
-        )
-
-        try:
-            #: get first page to initialize session
-            job_list, total_results = self.scrape_page(scraper_input, 0, session)
-
-            with ThreadPoolExecutor(max_workers=10) as executor:
-                futures: list[Future] = [
-                    executor.submit(self.scrape_page, scraper_input, page, session)
-                    for page in range(1, pages_to_process + 1)
-                ]
-
-                for future in futures:
-                    jobs, _ = future.result()
-
-                    job_list += jobs
-        except StatusException as e:
-            return JobResponse(
-                success=False,
-                error=f"Indeed returned status code {e.status_code}",
-            )
-
-        except ParsingException as e:
-            return JobResponse(
-                success=False,
-                error=f"Indeed failed to parse response: {e}",
-            )
-        except Exception as e:
-            return JobResponse(
-                success=False,
-                error=f"Indeed failed to scrape: {e}",
-            )
-
-        if len(job_list) > scraper_input.results_wanted:
-            job_list = job_list[: scraper_input.results_wanted]
-
-        job_response = JobResponse(
-            success=True,
-            jobs=job_list,
-            total_results=total_results,
-        )
-        return job_response
-
-    def get_description(self, job_page_url: str, session: tls_client.Session) -> str:
-        """
-        Retrieves job description by going to the job page url
-        :param job_page_url:
-        :param session:
-        :return: description
-        """
-        parsed_url = urllib.parse.urlparse(job_page_url)
-        params = urllib.parse.parse_qs(parsed_url.query)
-        jk_value = params.get("jk", [None])[0]
-        formatted_url = f"{self.url}/viewjob?jk={jk_value}&spa=1"
-
-        response = session.get(formatted_url, allow_redirects=True)
-
-        if response.status_code not in range(200, 400):
-            return None
-
-        raw_description = response.json()["body"]["jobInfoWrapperModel"][
-            "jobInfoModel"
-        ]["sanitizedJobDescription"]
-        soup = BeautifulSoup(raw_description, "html.parser")
-        text_content = " ".join(soup.get_text().split()).strip()
-        return text_content
-
-    @staticmethod
-    def get_job_type(job: dict) -> Optional[JobType]:
-        """
-        Parses the job to get JobTypeIndeed
-        :param job:
-        :return:
-        """
-        for taxonomy in job["taxonomyAttributes"]:
-            if taxonomy["label"] == "job-types":
-                if len(taxonomy["attributes"]) > 0:
-                    job_type_str = (
-                        taxonomy["attributes"][0]["label"]
-                        .replace("-", "_")
-                        .replace(" ", "_")
-                        .upper()
-                    )
-                    return JobType[job_type_str]
-        return None
-
-    @staticmethod
-    def parse_jobs(soup: BeautifulSoup) -> dict:
-        """
-        Parses the jobs from the soup object
-        :param soup:
-        :return: jobs
-        """
-
-        def find_mosaic_script() -> Optional[Tag]:
-            """
-            Finds jobcards script tag
-            :return: script_tag
-            """
-            script_tags = soup.find_all("script")
-
-            for tag in script_tags:
-                if (
-                    tag.string
-                    and "mosaic.providerData" in tag.string
-                    and "mosaic-provider-jobcards" in tag.string
-                ):
-                    return tag
-            return None
-
-        script_tag = find_mosaic_script()
-
-        if script_tag:
-            script_str = script_tag.string
-            pattern = r'window.mosaic.providerData\["mosaic-provider-jobcards"\]\s*=\s*({.*?});'
-            p = re.compile(pattern, re.DOTALL)
-            m = p.search(script_str)
-            if m:
-                jobs = json.loads(m.group(1).strip())
-                return jobs
-            else:
-                raise ParsingException("Could not find mosaic provider job cards data")
-        else:
-            raise ParsingException(
-                "Could not find a script tag containing mosaic provider data"
-            )
-
-    @staticmethod
-    def total_jobs(soup: BeautifulSoup) -> int:
-        """
-        Parses the total jobs for that search from soup object
-        :param soup:
-        :return: total_num_jobs
-        """
-        script = soup.find("script", string=lambda t: "window._initialData" in t)
-
-        pattern = re.compile(r"window._initialData\s*=\s*({.*})\s*;", re.DOTALL)
-        match = pattern.search(script.string)
-        total_num_jobs = 0
-        if match:
-            json_str = match.group(1)
-            data = json.loads(json_str)
-            total_num_jobs = int(data["searchTitleBarModel"]["totalNumResults"])
-        return total_num_jobs
--- a/jobspy/core/scrapers/linkedin/init.py
+++ b/jobspy/core/scrapers/linkedin/init.py
@@ -1,213 +0,0 @@
-from typing import Optional, Tuple
-from datetime import datetime
-
-import requests
-from bs4 import BeautifulSoup
-from bs4.element import Tag
-
-from .. import Scraper, ScraperInput, Site
-from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval
-
-
-class LinkedInScraper(Scraper):
-    def __init__(self):
-        """
-        Initializes LinkedInScraper with the LinkedIn job search url
-        """
-        site = Site(Site.LINKEDIN)
-        url = "https://www.linkedin.com"
-        super().__init__(site, url)
-
-    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
-        """
-        Scrapes LinkedIn for jobs with scraper_input criteria
-        :param scraper_input:
-        :return: job_response
-        """
-        job_list: list[JobPost] = []
-        seen_urls = set()
-        page, processed_jobs, job_count = 0, 0, 0
-
-        def job_type_code(job_type):
-            mapping = {
-                JobType.FULL_TIME: "F",
-                JobType.PART_TIME: "P",
-                JobType.INTERNSHIP: "I",
-                JobType.CONTRACT: "C",
-                JobType.TEMPORARY: "T",
-            }
-
-            return mapping.get(job_type, "")
-
-        with requests.Session() as session:
-            while len(job_list) < scraper_input.results_wanted:
-                params = {
-                    "keywords": scraper_input.search_term,
-                    "location": scraper_input.location,
-                    "distance": scraper_input.distance,
-                    "f_WT": 2 if scraper_input.is_remote else None,
-                    "f_JT": job_type_code(scraper_input.job_type)
-                    if scraper_input.job_type
-                    else None,
-                    "pageNum": page,
-                    "f_AL": "true" if scraper_input.easy_apply else None,
-                }
-
-                params = {k: v for k, v in params.items() if v is not None}
-                response = session.get(
-                    f"{self.url}/jobs/search", params=params, allow_redirects=True
-                )
-
-                if response.status_code != 200:
-                    return JobResponse(
-                        success=False,
-                        error=f"Response returned {response.status_code}",
-                    )
-
-                soup = BeautifulSoup(response.text, "html.parser")
-
-                if page == 0:
-                    job_count_text = soup.find(
-                        "span", class_="results-context-header__job-count"
-                    ).text
-                    job_count = int("".join(filter(str.isdigit, job_count_text)))
-
-                for job_card in soup.find_all(
-                    "div",
-                    class_="base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card",
-                ):
-                    processed_jobs += 1
-                    data_entity_urn = job_card.get("data-entity-urn", "")
-                    job_id = (
-                        data_entity_urn.split(":")[-1] if data_entity_urn else "N/A"
-                    )
-                    job_url = f"{self.url}/jobs/view/{job_id}"
-                    if job_url in seen_urls:
-                        continue
-                    seen_urls.add(job_url)
-                    job_info = job_card.find("div", class_="base-search-card__info")
-                    if job_info is None:
-                        continue
-                    title_tag = job_info.find("h3", class_="base-search-card__title")
-                    title = title_tag.text.strip() if title_tag else "N/A"
-
-                    company_tag = job_info.find("a", class_="hidden-nested-link")
-                    company = company_tag.text.strip() if company_tag else "N/A"
-
-                    metadata_card = job_info.find(
-                        "div", class_="base-search-card__metadata"
-                    )
-                    location: Location = LinkedInScraper.get_location(metadata_card)
-
-                    datetime_tag = metadata_card.find(
-                        "time", class_="job-search-card__listdate"
-                    )
-                    description, job_type = LinkedInScraper.get_description(job_url)
-                    if datetime_tag:
-                        datetime_str = datetime_tag["datetime"]
-                        date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
-                    else:
-                        date_posted = None
-
-                    job_post = JobPost(
-                        title=title,
-                        description=description,
-                        company_name=company,
-                        location=location,
-                        date_posted=date_posted,
-                        job_url=job_url,
-                        job_type=job_type,
-                        compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD")
-                    )
-                    job_list.append(job_post)
-                    if (
-                        len(job_list) >= scraper_input.results_wanted
-                        or processed_jobs >= job_count
-                    ):
-                        break
-                if (
-                    len(job_list) >= scraper_input.results_wanted
-                    or processed_jobs >= job_count
-                ):
-                    break
-
-                page += 1
-
-        job_list = job_list[: scraper_input.results_wanted]
-        job_response = JobResponse(
-            success=True,
-            jobs=job_list,
-            total_results=job_count,
-        )
-        return job_response
-
-    @staticmethod
-    def get_description(job_page_url: str) -> Optional[str]:
-        """
-        Retrieves job description by going to the job page url
-        :param job_page_url:
-        :return: description or None
-        """
-        response = requests.get(job_page_url, allow_redirects=True)
-        if response.status_code not in range(200, 400):
-            return None, None
-
-        soup = BeautifulSoup(response.text, "html.parser")
-        div_content = soup.find(
-            "div", class_=lambda x: x and "show-more-less-html__markup" in x
-        )
-
-        text_content = None
-        if div_content:
-            text_content = " ".join(div_content.get_text().split()).strip()
-
-        def get_job_type(
-            soup: BeautifulSoup,
-        ) -> Tuple[Optional[str], Optional[JobType]]:
-            """
-            Gets the job type from job page
-            :param soup:
-            :return: JobType
-            """
-            h3_tag = soup.find(
-                "h3",
-                class_="description__job-criteria-subheader",
-                string=lambda text: "Employment type" in text,
-            )
-
-            employment_type = None
-            if h3_tag:
-                employment_type_span = h3_tag.find_next_sibling(
-                    "span",
-                    class_="description__job-criteria-text description__job-criteria-text--criteria",
-                )
-                if employment_type_span:
-                    employment_type = employment_type_span.get_text(strip=True)
-                    employment_type = employment_type.lower()
-                    employment_type = employment_type.replace("-", "")
-
-            return JobType(employment_type)
-
-        return text_content, get_job_type(soup)
-
-    @staticmethod
-    def get_location(metadata_card: Optional[Tag]) -> Location:
-        """
-        Extracts the location data from the job metadata card.
-        :param metadata_card
-        :return: location
-        """
-        if metadata_card is not None:
-            location_tag = metadata_card.find(
-                "span", class_="job-search-card__location"
-            )
-            location_string = location_tag.text.strip() if location_tag else "N/A"
-            parts = location_string.split(", ")
-            if len(parts) == 2:
-                city, state = parts
-                location = Location(
-                    city=city,
-                    state=state,
-                )
-
-        return location
--- a/jobspy/core/scrapers/ziprecruiter/init.py
+++ b/jobspy/core/scrapers/ziprecruiter/init.py
@@ -1,405 +0,0 @@
-import math
-import json
-import re
-from datetime import datetime
-from typing import Optional, Tuple, List
-from urllib.parse import urlparse, parse_qs
-
-import tls_client
-from bs4 import BeautifulSoup
-from bs4.element import Tag
-from concurrent.futures import ThreadPoolExecutor, Future
-
-from .. import Scraper, ScraperInput, Site, StatusException
-from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
-
-
-class ZipRecruiterScraper(Scraper):
-    def __init__(self):
-        """
-        Initializes LinkedInScraper with the ZipRecruiter job search url
-        """
-        site = Site(Site.ZIP_RECRUITER)
-        url = "https://www.ziprecruiter.com"
-        super().__init__(site, url)
-
-        self.jobs_per_page = 20
-        self.seen_urls = set()
-        self.session = tls_client.Session(
-            client_identifier="chrome112", random_tls_extension_order=True
-        )
-
-    def scrape_page(
-        self, scraper_input: ScraperInput, page: int
-    ) -> tuple[list[JobPost], int | None]:
-        """
-        Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
-        :param scraper_input:
-        :param page:
-        :param session:
-        :return: jobs found on page, total number of jobs found for search
-        """
-
-        job_list = []
-
-        job_type_value = None
-        if scraper_input.job_type:
-            if scraper_input.job_type.value == "fulltime":
-                job_type_value = "full_time"
-            elif scraper_input.job_type.value == "parttime":
-                job_type_value = "part_time"
-            else:
-                job_type_value = scraper_input.job_type.value
-
-        params = {
-            "search": scraper_input.search_term,
-            "location": scraper_input.location,
-            "page": page,
-            "form": "jobs-landing"
-        }
-
-        if scraper_input.is_remote:
-            params["refine_by_location_type"] = "only_remote"
-
-        if scraper_input.distance:
-            params["radius"] = scraper_input.distance
-
-        if job_type_value:
-            params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}"
-
-        response = self.session.get(
-            self.url + "/jobs-search",
-            headers=ZipRecruiterScraper.headers(),
-            params=params,
-        )
-
-        if response.status_code != 200:
-            raise StatusException(response.status_code)
-
-        html_string = response.text
-        soup = BeautifulSoup(html_string, "html.parser")
-
-        script_tag = soup.find("script", {"id": "js_variables"})
-        data = json.loads(script_tag.string)
-
-        if page == 1:
-            job_count = int(data["totalJobCount"].replace(",", ""))
-        else:
-            job_count = None
-
-        with ThreadPoolExecutor(max_workers=10) as executor:
-            if "jobList" in data and data["jobList"]:
-                jobs_js = data["jobList"]
-                job_results = [executor.submit(self.process_job_js, job) for job in jobs_js]
-            else:
-                jobs_html = soup.find_all("div", {"class": "job_content"})
-                job_results = [executor.submit(self.process_job_html, job) for job in
-                               jobs_html]
-
-        job_list = [result.result() for result in job_results if result.result()]
-
-        return job_list, job_count
-
-    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
-        """
-        Scrapes ZipRecruiter for jobs with scraper_input criteria
-        :param scraper_input:
-        :return: job_response
-        """
-
-
-        pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page))
-
-        try:
-            #: get first page to initialize session
-            job_list, total_results = self.scrape_page(scraper_input, 1)
-
-            with ThreadPoolExecutor(max_workers=10) as executor:
-                futures: list[Future] = [
-                    executor.submit(self.scrape_page, scraper_input, page)
-                    for page in range(2, pages_to_process + 1)
-                ]
-
-                for future in futures:
-                    jobs, _ = future.result()
-
-                    job_list += jobs
-
-
-        except StatusException as e:
-            return JobResponse(
-                success=False,
-                error=f"ZipRecruiter returned status code {e.status_code}",
-            )
-        except Exception as e:
-            return JobResponse(
-                success=False,
-                error=f"ZipRecruiter failed to scrape: {e}",
-            )
-
-        #: note: this does not handle if the results are more or less than the results_wanted
-
-        if len(job_list) > scraper_input.results_wanted:
-            job_list = job_list[: scraper_input.results_wanted]
-
-        job_response = JobResponse(
-            success=True,
-            jobs=job_list,
-            total_results=total_results,
-        )
-        return job_response
-
-    def process_job_html(self, job: Tag) -> Optional[JobPost]:
-        """
-        Parses a job from the job content tag
-        :param job: BeautifulSoup Tag for one job post
-        :return JobPost
-        """
-        job_url = job.find("a", {"class": "job_link"})["href"]
-        if job_url in self.seen_urls:
-            return None
-
-        title = job.find("h2", {"class": "title"}).text
-        company = job.find("a", {"class": "company_name"}).text.strip()
-
-        description, updated_job_url = self.get_description(
-            job_url
-        )
-        if updated_job_url is not None:
-            job_url = updated_job_url
-        if description is None:
-            description = job.find("p", {"class": "job_snippet"}).text.strip()
-
-        job_type_element = job.find("li", {"class": "perk_item perk_type"})
-        if job_type_element:
-            job_type_text = (
-                job_type_element.text.strip()
-                .lower()
-                .replace("-", "")
-                .replace(" ", "")
-            )
-            if job_type_text == "contractor":
-                job_type_text = "contract"
-            job_type = JobType(job_type_text)
-        else:
-            job_type = None
-
-        date_posted = ZipRecruiterScraper.get_date_posted(job)
-
-        job_post = JobPost(
-            title=title,
-            description=description,
-            company_name=company,
-            location=ZipRecruiterScraper.get_location(job),
-            job_type=job_type,
-            compensation=ZipRecruiterScraper.get_compensation(job),
-            date_posted=date_posted,
-            job_url=job_url,
-        )
-        return job_post
-
-    def process_job_js(self, job: dict) -> JobPost:
-        # Map the job data to the expected fields by the Pydantic model
-        title = job.get("Title")
-        description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text()
-
-        company = job.get("OrgName")
-        location = Location(city=job.get("City"), state=job.get("State"))
-        try:
-            job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower())
-        except ValueError:
-            # print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
-            return None
-
-        formatted_salary = job.get("FormattedSalaryShort", "")
-        salary_parts = formatted_salary.split(" ")
-
-        min_salary_str = salary_parts[0][1:].replace(",", "")
-        if '.' in min_salary_str:
-            min_amount = int(float(min_salary_str) * 1000)
-        else:
-            min_amount = int(min_salary_str.replace("K", "000"))
-
-        if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
-            max_salary_str = salary_parts[2][1:].replace(",", "")
-            if '.' in max_salary_str:
-                max_amount = int(float(max_salary_str) * 1000)
-            else:
-                max_amount = int(max_salary_str.replace("K", "000"))
-        else:
-            max_amount = 0
-
-        compensation = Compensation(
-            interval=CompensationInterval.YEARLY,
-            min_amount=min_amount,
-            max_amount=max_amount
-        )
-        save_job_url = job.get("SaveJobURL", "")
-        posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url)
-        if posted_time_match:
-            date_time_str = posted_time_match.group(1)
-            date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
-            date_posted = date_posted_obj.date()
-        else:
-            date_posted = date.today()
-        job_url = job.get("JobURL")
-
-        return JobPost(
-            title=title,
-            description=description,
-            company_name=company,
-            location=location,
-            job_type=job_type,
-            compensation=compensation,
-            date_posted=date_posted,
-            job_url=job_url,
-        )
-        return job_post
-
-    @staticmethod
-    def job_type_from_string(value: str) -> Optional[JobType]:
-        if not value:
-            return None
-
-        if value.lower() == "contractor":
-            value = "contract"
-        normalized_value = value.replace("_", "")
-        for item in JobType:
-            if item.value == normalized_value:
-                return item
-        raise ValueError(f"Invalid value for JobType: {value}")
-
-    def get_description(
-            self,
-        job_page_url: str
-    ) -> Tuple[Optional[str], Optional[str]]:
-        """
-        Retrieves job description by going to the job page url
-        :param job_page_url:
-        :param session:
-        :return: description or None, response url
-        """
-        response = self.session.get(
-            job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
-        )
-        if response.status_code not in range(200, 400):
-            return None, None
-
-        html_string = response.content
-        soup_job = BeautifulSoup(html_string, "html.parser")
-
-        job_description_div = soup_job.find("div", {"class": "job_description"})
-        if job_description_div:
-            return job_description_div.text.strip(), response.url
-        return None, response.url
-
-    @staticmethod
-    def get_interval(interval_str: str):
-        """
-         Maps the interval alias to its appropriate CompensationInterval.
-        :param interval_str
-        :return: CompensationInterval
-        """
-        interval_alias = {"annually": CompensationInterval.YEARLY}
-        interval_str = interval_str.lower()
-
-        if interval_str in interval_alias:
-            return interval_alias[interval_str]
-
-        return CompensationInterval(interval_str)
-
-    @staticmethod
-    def get_date_posted(job: BeautifulSoup) -> Optional[datetime.date]:
-        """
-        Extracts the date a job was posted
-        :param job
-        :return: date the job was posted or None
-        """
-        button = job.find(
-            "button", {"class": "action_input save_job zrs_btn_secondary_200"}
-        )
-        if not button:
-            return None
-
-        url_time = button.get("data-href", "")
-        url_components = urlparse(url_time)
-        params = parse_qs(url_components.query)
-        posted_time_str = params.get("posted_time", [None])[0]
-
-        if posted_time_str:
-            posted_date = datetime.strptime(
-                posted_time_str, "%Y-%m-%dT%H:%M:%SZ"
-            ).date()
-            return posted_date
-
-        return None
-
-    @staticmethod
-    def get_compensation(job: BeautifulSoup) -> Optional[Compensation]:
-        """
-        Parses the compensation tag from the job BeautifulSoup object
-        :param job
-        :return: Compensation object or None
-        """
-        pay_element = job.find("li", {"class": "perk_item perk_pay"})
-        if pay_element is None:
-            return None
-        pay = pay_element.find("div", {"class": "value"}).find("span").text.strip()
-
-        def create_compensation_object(pay_string: str) -> Compensation:
-            """
-            Creates a Compensation object from a pay_string
-            :param pay_string
-            :return: compensation
-            """
-            interval = ZipRecruiterScraper.get_interval(pay_string.split()[-1])
-
-            amounts = []
-            for amount in pay_string.split("to"):
-                amount = amount.replace(",", "").strip("$ ").split(" ")[0]
-                if "K" in amount:
-                    amount = amount.replace("K", "")
-                    amount = int(float(amount)) * 1000
-                else:
-                    amount = int(float(amount))
-                amounts.append(amount)
-
-            compensation = Compensation(
-                interval=interval, min_amount=min(amounts), max_amount=max(amounts)
-            )
-
-            return compensation
-
-        return create_compensation_object(pay)
-
-    @staticmethod
-    def get_location(job: BeautifulSoup) -> Location:
-        """
-        Extracts the job location from BeatifulSoup object
-        :param job:
-        :return: location
-        """
-        location_link = job.find("a", {"class": "company_location"})
-        if location_link is not None:
-            location_string = location_link.text.strip()
-            parts = location_string.split(", ")
-            if len(parts) == 2:
-                city, state = parts
-            else:
-                city, state = None, None
-        else:
-            city, state = None, None
-        return Location(
-            city=city,
-            state=state,
-        )
-
-    @staticmethod
-    def headers() -> dict:
-        """
-        Returns headers needed for requests
-        :return: dict - Dictionary containing headers
-        """
-        return {
-            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"
-        }
--- a/jobspy/tests/init.py
+++ b/jobspy/tests/init.py
--- a/jobspy/tests/test_indeed.py
+++ b/jobspy/tests/test_indeed.py
@@ -1,10 +0,0 @@
-from jobspy import scrape_jobs
-
-
-def test_indeed():
-    result = scrape_jobs(
-        site_name="indeed",
-        search_term="software engineer",
-    )
-
-    assert result is not None
--- a/jobspy/tests/test_ziprecruiter.py
+++ b/jobspy/tests/test_ziprecruiter.py
@@ -1,10 +0,0 @@
-from jobspy import scrape_jobs
-
-
-def test_ziprecruiter():
-    result = scrape_jobs(
-        site_name="zip_recruiter",
-        search_term="software engineer",
-    )
-
-    assert result is not None