fix job type search (#106 )

feat: Ability to query by time posted for linkedin, indeed, glassdoor, ziprecruiter (#103 )
enh: Indeed company url (#104 )
2026-03-05 12:04:33 -08:00 · 2024-02-12 11:02:48 -06:00 · 2024-02-09 14:02:03 -06:00 · 2024-02-09 12:05:10 -06:00 · 2024-02-08 18:13:19 -06:00 · 2024-02-04 09:25:10 -06:00
10 changed files with 525 additions and 271 deletions
--- a/README.md
+++ b/README.md
@@ -29,18 +29,20 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
 ### Usage
 ```python
 import csv
 from jobspy import scrape_jobs
 jobs = scrape_jobs(
    site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
    search_term="software engineer",
    location="Dallas, TX",
-    results_wanted=10,
+    results_wanted=20,
    hours_old=72, # (only linkedin is hour specific, others round up to days old)
    country_indeed='USA'  # only needed for indeed / glassdoor
 )
 print(f"Found {len(jobs)} jobs")
 print(jobs.head())
-jobs.to_csv("jobs.csv", index=False) # to_xlsx
+jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_xlsx
 ```
 ### Output
@@ -67,11 +69,13 @@ Optional
 ├── job_type (enum): fulltime, parttime, internship, contract
 ├── proxy (str): in format 'http://user:pass@host:port' or [https, socks]
 ├── is_remote (bool)
-├── full_description (bool): fetches full description for Indeed / LinkedIn (much slower)
+├── full_description (bool): fetches full description for LinkedIn (slower)
 ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
 ├── easy_apply (bool): filters for jobs that are hosted on the job board site
 ├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids
 ├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
 ├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
 ├── hours_old (int): filters jobs by the number of hours since the job was posted (all but LinkedIn rounds up to next day)
 ```
 ### JobPost Schema
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.39"
+version = "1.1.44"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
 homepage = "https://github.com/Bunsly/JobSpy"
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@@ -1,7 +1,6 @@
 import pandas as pd
-import concurrent.futures
+from typing import Tuple
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Tuple, Optional
 from .jobs import JobType, Location
 from .scrapers.indeed import IndeedScraper
@@ -29,19 +28,22 @@ def _map_str_to_site(site_name: str) -> Site:
 def scrape_jobs(
-    site_name: str | list[str] | Site | list[Site],
+    site_name: str | list[str] | Site | list[Site] | None = None,
-    search_term: str,
+    search_term: str | None = None,
-    location: str = "",
+    location: str | None = None,
-    distance: int = None,
+    distance: int | None = None,
    is_remote: bool = False,
-    job_type: str = None,
+    job_type: str | None = None,
-    easy_apply: bool = False,  # linkedin
+    easy_apply: bool | None = None,
    results_wanted: int = 15,
    country_indeed: str = "usa",
    hyperlinks: bool = False,
-    proxy: Optional[str] = None,
+    proxy: str | None = None,
-    full_description: Optional[bool] = False,
+    full_description: bool | None = False,
-    offset: Optional[int] = 0,
+    linkedin_company_ids: list[int] | None = None,
    offset: int | None = 0,
    hours_old: int = None,
    **kwargs,
 ) -> pd.DataFrame:
    """
    Simultaneously scrapes job data from multiple job sites.
@@ -56,18 +58,23 @@ def scrape_jobs(
    job_type = get_enum_from_value(job_type) if job_type else None
-    if type(site_name) == str:
+    def get_site_type():
-        site_type = [_map_str_to_site(site_name)]
+        site_types = list(Site)
-    else:  #: if type(site_name) == list
+        if isinstance(site_name, str):
-        site_type = [
+            site_types = [_map_str_to_site(site_name)]
-            _map_str_to_site(site) if type(site) == str else site_name
+        elif isinstance(site_name, Site):
-            for site in site_name
+            site_types = [site_name]
-        ]
+        elif isinstance(site_name, list):
            site_types = [
                _map_str_to_site(site) if isinstance(site, str) else site
                for site in site_name
            ]
        return site_types
    country_enum = Country.from_string(country_indeed)
    scraper_input = ScraperInput(
-        site_type=site_type,
+        site_type=get_site_type(),
        country=country_enum,
        search_term=search_term,
        location=location,
@@ -77,7 +84,9 @@ def scrape_jobs(
        easy_apply=easy_apply,
        full_description=full_description,
        results_wanted=results_wanted,
        linkedin_company_ids=linkedin_company_ids,
        offset=offset,
        hours_old=hours_old
    )
    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
@@ -112,7 +121,7 @@ def scrape_jobs(
            executor.submit(worker, site): site for site in scraper_input.site_type
        }
-        for future in concurrent.futures.as_completed(future_to_site):
+        for future in as_completed(future_to_site):
            site_value, scraped_data = future.result()
            site_to_jobs_dict[site_value] = scraped_data
@@ -183,4 +192,4 @@ def scrape_jobs(
    else:
        jobs_formatted_df = pd.DataFrame()
-    return jobs_formatted_df
+    return jobs_formatted_df.sort_values(by=['site', 'date_posted'], ascending=[True, False])
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@@ -193,13 +193,20 @@ class CompensationInterval(Enum):
    @classmethod
    def get_interval(cls, pay_period):
-        return cls[pay_period].value if pay_period in cls.__members__ else None
+        interval_mapping = {
            "YEAR": cls.YEARLY,
            "HOUR": cls.HOURLY,
        }
        if pay_period in interval_mapping:
            return interval_mapping[pay_period].value
        else:
            return cls[pay_period].value if pay_period in cls.__members__ else None
 class Compensation(BaseModel):
    interval: Optional[CompensationInterval] = None
-    min_amount: int | None = None
+    min_amount: float | None = None
-    max_amount: int | None = None
+    max_amount: float | None = None
    currency: Optional[str] = "USD"
--- a/src/jobspy/scrapers/init.py
+++ b/src/jobspy/scrapers/init.py
@@ -1,5 +1,4 @@
 from ..jobs import Enum, BaseModel, JobType, JobResponse, Country
 from typing import List, Optional, Any
 class Site(Enum):
@@ -10,25 +9,26 @@ class Site(Enum):
 class ScraperInput(BaseModel):
-    site_type: List[Site]
+    site_type: list[Site]
-    search_term: str
+    search_term: str | None = None
-    location: str = None
+    location: str | None = None
-    country: Optional[Country] = Country.USA
+    country: Country | None = Country.USA
-    distance: Optional[int] = None
+    distance: int | None = None
    is_remote: bool = False
-    job_type: Optional[JobType] = None
+    job_type: JobType | None = None
-    easy_apply: bool = None  # linkedin
+    easy_apply: bool | None = None
    full_description: bool = False
    offset: int = 0
    linkedin_company_ids: list[int] | None = None
    results_wanted: int = 15
    hours_old: int | None = None
 class Scraper:
-    def __init__(self, site: Site, proxy: Optional[List[str]] = None):
+    def __init__(self, site: Site, proxy: list[str] | None = None):
        self.site = site
        self.proxy = (lambda p: {"http": p, "https": p} if p else None)(proxy)
-    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
+    def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...
        ...
--- a/src/jobspy/scrapers/glassdoor/init.py
+++ b/src/jobspy/scrapers/glassdoor/init.py
@@ -6,7 +6,6 @@ This module contains routines to scrape Glassdoor.
 """
 import json
 import requests
 from bs4 import BeautifulSoup
 from typing import Optional
 from datetime import datetime, timedelta
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -14,7 +13,7 @@ from ..utils import count_urgent_words, extract_emails_from_text
 from .. import Scraper, ScraperInput, Site
 from ..exceptions import GlassdoorException
-from ..utils import create_session, modify_and_get_description
+from ..utils import create_session
 from ...jobs import (
    JobPost,
    Compensation,
@@ -35,6 +34,7 @@ class GlassdoorScraper(Scraper):
        self.url = None
        self.country = None
        self.session = None
        self.jobs_per_page = 30
        self.seen_urls = set()
@@ -53,8 +53,7 @@ class GlassdoorScraper(Scraper):
            payload = self.add_payload(
                scraper_input, location_id, location_type, page_num, cursor
            )
-            session = create_session(self.proxy, is_tls=False, has_retry=True)
+            response = self.session.post(
            response = session.post(
                f"{self.url}/graph", headers=self.headers(), timeout=10, data=payload
            )
            if response.status_code != 200:
@@ -73,7 +72,6 @@ class GlassdoorScraper(Scraper):
        with ThreadPoolExecutor(max_workers=self.jobs_per_page) as executor:
            future_to_job_data = {executor.submit(self.process_job, job): job for job in jobs_data}
            for future in as_completed(future_to_job_data):
                job_data = future_to_job_data[future]
                try:
                    job_post = future.result()
                    if job_post:
@@ -100,7 +98,7 @@ class GlassdoorScraper(Scraper):
        location_type = job["header"].get("locationType", "")
        age_in_days = job["header"].get("ageInDays")
        is_remote, location = False, None
-        date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days else None
+        date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days is not None else None
        if location_type == "S":
            is_remote = True
@@ -111,7 +109,7 @@ class GlassdoorScraper(Scraper):
        try:
            description = self.fetch_job_description(job_id)
-        except Exception as e :
+        except:
            description = None
        job_post = JobPost(
@@ -145,6 +143,8 @@ class GlassdoorScraper(Scraper):
        all_jobs: list[JobPost] = []
        cursor = None
        max_pages = 30
        self.session = create_session(self.proxy, is_tls=False, has_retry=True)
        self.session.get(self.url)
        try:
            for page in range(
@@ -201,8 +201,7 @@ class GlassdoorScraper(Scraper):
            return None
        data = response.json()[0]
        desc = data['data']['jobview']['job']['description']
-        soup = BeautifulSoup(desc, 'html.parser')
+        return desc
        return modify_and_get_description(soup)
    @staticmethod
    def parse_compensation(data: dict) -> Optional[Compensation]:
@@ -246,6 +245,8 @@ class GlassdoorScraper(Scraper):
            location_type = "CITY"
        elif location_type == "S":
            location_type = "STATE"
        elif location_type == 'N':
            location_type = "COUNTRY"
        return int(items[0]["locationId"]), location_type
    @staticmethod
@@ -256,11 +257,18 @@ class GlassdoorScraper(Scraper):
        page_num: int,
        cursor: str | None = None,
    ) -> str:
        # `fromage` is the posting time filter in days
        fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None
        filter_params = []
        if scraper_input.easy_apply:
            filter_params.append({"filterKey": "applicationType", "values": "1"})
        if fromage:
            filter_params.append({"filterKey": "fromAge", "values": str(fromage)})
        payload = {
            "operationName": "JobSearchResultsQuery",
            "variables": {
                "excludeJobListingIds": [],
-                "filterParams": [{"filterKey": "applicationType", "values": "1"}] if scraper_input.easy_apply else [],
+                "filterParams": filter_params,
                "keyword": scraper_input.search_term,
                "numJobsToShow": 30,
                "locationType": location_type,
@@ -268,22 +276,180 @@ class GlassdoorScraper(Scraper):
                "parameterUrlInput": f"IL.0,12_I{location_type}{location_id}",
                "pageNumber": page_num,
                "pageCursor": cursor,
                "fromage": fromage,
                "sort": "date"
            },
-            "query": "query JobSearchResultsQuery($excludeJobListingIds: [Long!], $keyword: String, $locationId: Int, $locationType: LocationTypeEnum, $numJobsToShow: Int!, $pageCursor: String, $pageNumber: Int, $filterParams: [FilterParams], $originalPageUrl: String, $seoFriendlyUrlInput: String, $parameterUrlInput: String, $seoUrl: Boolean) {\n  jobListings(\n    contextHolder: {searchParams: {excludeJobListingIds: $excludeJobListingIds, keyword: $keyword, locationId: $locationId, locationType: $locationType, numPerPage: $numJobsToShow, pageCursor: $pageCursor, pageNumber: $pageNumber, filterParams: $filterParams, originalPageUrl: $originalPageUrl, seoFriendlyUrlInput: $seoFriendlyUrlInput, parameterUrlInput: $parameterUrlInput, seoUrl: $seoUrl, searchType: SR}}\n  ) {\n    companyFilterOptions {\n      id\n      shortName\n      __typename\n    }\n    filterOptions\n    indeedCtk\n    jobListings {\n      ...JobView\n      __typename\n    }\n    jobListingSeoLinks {\n      linkItems {\n        position\n        url\n        __typename\n      }\n      __typename\n    }\n    jobSearchTrackingKey\n    jobsPageSeoData {\n      pageMetaDescription\n      pageTitle\n      __typename\n    }\n    paginationCursors {\n      cursor\n      pageNumber\n      __typename\n    }\n    indexablePageForSeo\n    searchResultsMetadata {\n      searchCriteria {\n        implicitLocation {\n          id\n          localizedDisplayName\n          type\n          __typename\n        }\n        keyword\n        location {\n          id\n          shortName\n          localizedShortName\n          localizedDisplayName\n          type\n          __typename\n        }\n        __typename\n      }\n      footerVO {\n        countryMenu {\n          childNavigationLinks {\n            id\n            link\n            textKey\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      helpCenterDomain\n      helpCenterLocale\n      jobAlert {\n        jobAlertExists\n        __typename\n      }\n      jobSerpFaq {\n        questions {\n          answer\n          question\n          __typename\n        }\n        __typename\n      }\n      jobSerpJobOutlook {\n        occupation\n        paragraph\n        __typename\n      }\n      showMachineReadableJobs\n      __typename\n    }\n    serpSeoLinksVO {\n      relatedJobTitlesResults\n      searchedJobTitle\n      searchedKeyword\n      searchedLocationIdAsString\n      searchedLocationSeoName\n      searchedLocationType\n      topCityIdsToNameResults {\n        key\n        value\n        __typename\n      }\n      topEmployerIdsToNameResults {\n        key\n        value\n        __typename\n      }\n      topEmployerNameResults\n      topOccupationResults\n      __typename\n    }\n    totalJobsCount\n    __typename\n  }\n}\n\nfragment JobView on JobListingSearchResult {\n  jobview {\n    header {\n      adOrderId\n      advertiserType\n      adOrderSponsorshipLevel\n      ageInDays\n      divisionEmployerName\n      easyApply\n      employer {\n        id\n        name\n        shortName\n        __typename\n      }\n      employerNameFromSearch\n      goc\n      gocConfidence\n      gocId\n      jobCountryId\n      jobLink\n      jobResultTrackingKey\n      jobTitleText\n      locationName\n      locationType\n      locId\n      needsCommission\n      payCurrency\n      payPeriod\n      payPeriodAdjustedPay {\n        p10\n        p50\n        p90\n        __typename\n      }\n      rating\n      salarySource\n      savedJobId\n      sponsored\n      __typename\n    }\n    job {\n      descriptionFragments\n      importConfigId\n      jobTitleId\n      jobTitleText\n      listingId\n      __typename\n    }\n    jobListingAdminDetails {\n      cpcVal\n      importConfigId\n      jobListingId\n      jobSourceId\n      userEligibleForAdminJobDetails\n      __typename\n    }\n    overview {\n      shortName\n      squareLogoUrl\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n",
+            "query": """
            query JobSearchResultsQuery(
                $excludeJobListingIds: [Long!], 
                $keyword: String, 
                $locationId: Int, 
                $locationType: LocationTypeEnum, 
                $numJobsToShow: Int!, 
                $pageCursor: String, 
                $pageNumber: Int, 
                $filterParams: [FilterParams], 
                $originalPageUrl: String, 
                $seoFriendlyUrlInput: String, 
                $parameterUrlInput: String, 
                $seoUrl: Boolean
            ) {
                jobListings(
                    contextHolder: {
                        searchParams: {
                            excludeJobListingIds: $excludeJobListingIds, 
                            keyword: $keyword, 
                            locationId: $locationId, 
                            locationType: $locationType, 
                            numPerPage: $numJobsToShow, 
                            pageCursor: $pageCursor, 
                            pageNumber: $pageNumber, 
                            filterParams: $filterParams, 
                            originalPageUrl: $originalPageUrl, 
                            seoFriendlyUrlInput: $seoFriendlyUrlInput, 
                            parameterUrlInput: $parameterUrlInput, 
                            seoUrl: $seoUrl, 
                            searchType: SR
                        }
                    }
                ) {
                    companyFilterOptions {
                        id
                        shortName
                        __typename
                    }
                    filterOptions
                    indeedCtk
                    jobListings {
                        ...JobView
                        __typename
                    }
                    jobListingSeoLinks {
                        linkItems {
                            position
                            url
                            __typename
                        }
                        __typename
                    }
                    jobSearchTrackingKey
                    jobsPageSeoData {
                        pageMetaDescription
                        pageTitle
                        __typename
                    }
                    paginationCursors {
                        cursor
                        pageNumber
                        __typename
                    }
                    indexablePageForSeo
                    searchResultsMetadata {
                        searchCriteria {
                            implicitLocation {
                                id
                                localizedDisplayName
                                type
                                __typename
                            }
                            keyword
                            location {
                                id
                                shortName
                                localizedShortName
                                localizedDisplayName
                                type
                                __typename
                            }
                            __typename
                        }
                        helpCenterDomain
                        helpCenterLocale
                        jobSerpJobOutlook {
                            occupation
                            paragraph
                            __typename
                        }
                        showMachineReadableJobs
                        __typename
                    }
                    totalJobsCount
                    __typename
                }
            }
            fragment JobView on JobListingSearchResult {
                jobview {
                    header {
                        adOrderId
                        advertiserType
                        adOrderSponsorshipLevel
                        ageInDays
                        divisionEmployerName
                        easyApply
                        employer {
                            id
                            name
                            shortName
                            __typename
                        }
                        employerNameFromSearch
                        goc
                        gocConfidence
                        gocId
                        jobCountryId
                        jobLink
                        jobResultTrackingKey
                        jobTitleText
                        locationName
                        locationType
                        locId
                        needsCommission
                        payCurrency
                        payPeriod
                        payPeriodAdjustedPay {
                            p10
                            p50
                            p90
                            __typename
                        }
                        rating
                        salarySource
                        savedJobId
                        sponsored
                        __typename
                    }
                    job {
                        description
                        importConfigId
                        jobTitleId
                        jobTitleText
                        listingId
                        __typename
                    }
                    jobListingAdminDetails {
                        cpcVal
                        importConfigId
                        jobListingId
                        jobSourceId
                        userEligibleForAdminJobDetails
                        __typename
                    }
                    overview {
                        shortName
                        squareLogoUrl
                        __typename
                    }
                    __typename
                }
                __typename
            }
            """
        }
-        job_type_filters = {
+        if scraper_input.job_type:
            JobType.FULL_TIME: "fulltime",
            JobType.PART_TIME: "parttime",
            JobType.CONTRACT: "contract",
            JobType.INTERNSHIP: "internship",
            JobType.TEMPORARY: "temporary",
        }
        if scraper_input.job_type in job_type_filters:
            filter_value = job_type_filters[scraper_input.job_type]
            payload["variables"]["filterParams"].append(
-                {"filterKey": "jobType", "values": filter_value}
+                {"filterKey": "jobType", "values": scraper_input.job_type.value[0]}
            )
        return json.dumps([payload])
@@ -319,7 +485,6 @@ class GlassdoorScraper(Scraper):
            "apollographql-client-name": "job-search-next",
            "apollographql-client-version": "4.65.5",
            "content-type": "application/json",
            "cookie": 'gdId=91e2dfc4-c8b5-4fa7-83d0-11512b80262c; G_ENABLED_IDPS=google; trs=https%3A%2F%2Fwww.redhat.com%2F:referral:referral:2023-07-05+09%3A50%3A14.862:undefined:undefined; g_state={"i_p":1688587331651,"i_l":1}; _cfuvid=.7llazxhYFZWi6EISSPdVjtqF0NMVwzxr_E.cB1jgLs-1697828392979-0-604800000; GSESSIONID=undefined; JSESSIONID=F03DD1B5EE02DB6D842FE42B142F88F3; cass=1; jobsClicked=true; indeedCtk=1hd77b301k79i801; asst=1697829114.2; G_AUTHUSER_H=0; uc=8013A8318C98C517FE6DD0024636DFDEF978FC33266D93A2FAFEF364EACA608949D8B8FA2DC243D62DE271D733EB189D809ABE5B08D7B1AE865D217BD4EEBB97C282F5DA5FEFE79C937E3F6110B2A3A0ADBBA3B4B6DF5A996FEE00516100A65FCB11DA26817BE8D1C1BF6CFE36B5B68A3FDC2CFEC83AB797F7841FBB157C202332FC7E077B56BD39B167BDF3D9866E3B; AWSALB=zxc/Yk1nbWXXT6HjNyn3H4h4950ckVsFV/zOrq5LSoChYLE1qV+hDI8Axi3fUa9rlskndcO0M+Fw+ZnJ+AQ2afBFpyOd1acouLMYgkbEpqpQaWhY6/Gv4QH1zBcJ; AWSALBCORS=zxc/Yk1nbWXXT6HjNyn3H4h4950ckVsFV/zOrq5LSoChYLE1qV+hDI8Axi3fUa9rlskndcO0M+Fw+ZnJ+AQ2afBFpyOd1acouLMYgkbEpqpQaWhY6/Gv4QH1zBcJ; gdsid=1697828393025:1697830776351:668396EDB9E6A832022D34414128093D; at=HkH8Hnqi9uaMC7eu0okqyIwqp07ht9hBvE1_St7E_hRqPvkO9pUeJ1Jcpds4F3g6LL5ADaCNlxrPn0o6DumGMfog8qI1-zxaV_jpiFs3pugntw6WpVyYWdfioIZ1IDKupyteeLQEM1AO4zhGjY_rPZynpsiZBPO_B1au94sKv64rv23yvP56OiWKKfI-8_9hhLACEwWvM-Az7X-4aE2QdFt93VJbXbbGVf07bdDZfimsIkTtgJCLSRhU1V0kEM1Efyu66vo3m77gFFaMW7lxyYnb36I5PdDtEXBm3aL-zR7-qa5ywd94ISEivgqQOA4FPItNhqIlX4XrfD1lxVz6rfPaoTIDi4DI6UMCUjwyPsuv8mn0rYqDfRnmJpZ97fJ5AnhrknAd_6ZWN5v1OrxJczHzcXd8LO820QPoqxzzG13bmSTXLwGSxMUCtSrVsq05hicimQ3jpRt0c1dA4OkTNqF7_770B9JfcHcM8cr8-C4IL56dnOjr9KBGfN1Q2IvZM2cOBRbV7okiNOzKVZ3qJ24AE34WA2F3U6Whiu6H8nIuGG5hSNkVygY6CtglNZfFF9p8pJAZm79PngrrBv-CXFBZmhYLFo46lmFetDkiJ6mirtez4tKpzTIYjIp4_JAkiZFwbLJ2QGH4mK8kyyW0lZiX1DTuQec50N_5wvRo0Gt7nlKxzLsApMnaNhuQeH5ygh_pa381ORo9mQGi0EYF9zk00pa2--z4PtjfQ8KFq36GgpxKy5-o4qgqygZj8F01L8r-FiX2G4C7PREMIpAyHX2A4-_JxA1IS2j12EyqKTLqE9VcP06qm2Z-YuIW3ctmpMxy5G9_KiEiGv17weizhSFnl6SbpAEY-2VSmQ5V6jm3hoMp2jemkuGCRkZeFstLDEPxlzFN7WM; __cf_bm=zGaVjIJw4irf40_7UVw54B6Ohm271RUX4Tc8KVScrbs-1697830777-0-AYv2GnKTnnCU+cY9xHbJunO0DwlLDO6SIBnC/s/qldpKsGK0rRAjD6y8lbyATT/KlS7g29OZaN4fbd0lrJg0KmWbIybZIzfWVLHSYePVuOhu; asst=1697829114.2; at=dFhXf64wsf2TlnWy41xLs7skJkuxgKToEGcjGtDfUvW4oEAJ4tTIR5dKQ8wbwT75aIaGgdCfvcb-da7vwrCGWscCncmfLFQpJ9l-LLwoRfk-pMsxHhd77wvf-W7I0HSm7-Q5lQJqI9WyNGRxOa-RpzBTf4L8_Et4-3FzjPaAoYY5pY1FhuwXbN5asGOAMW-p8cjpbfn3PumlIYuckguWnjrcY2F31YJ_1noeoHM9tCGpymANbqGXRkG6aXY7yCfVXtdgZU1K5SMeaSPZIuF_iLUxjc_corzpNiH6qq7BIAmh-e5Aa-g7cwpZcln1fmwTVw4uTMZf1eLIMTa9WzgqZNkvG-sGaq_XxKA_Wai6xTTkOHfRgm4632Ba2963wdJvkGmUUa3tb_L4_wTgk3eFnHp5JhghLfT2Pe3KidP-yX__vx8JOsqe3fndCkKXgVz7xQKe1Dur-sMNlGwi4LXfguTT2YUI8C5Miq3pj2IHc7dC97eyyAiAM4HvyGWfaXWZcei6oIGrOwMvYgy0AcwFry6SIP2SxLT5TrxinRRuem1r1IcOTJsMJyUPp1QsZ7bOyq9G_0060B4CPyovw5523hEuqLTM-R5e5yavY6C_1DHUyE15C3mrh7kdvmlGZeflnHqkFTEKwwOftm-Mv-CKD5Db9ABFGNxKB2FH7nDH67hfOvm4tGNMzceBPKYJ3wciTt9jK3wy39_7cOYVywfrZ-oLhw_XtsbGSSeGn3HytrfgSADAh2sT0Gg6eCC9Xy1vh-Za337SVLUDXZ73W2xJxxUHBkFzZs8L_Xndo5DsbpWhVs9IYUGyraJdqB3SLgDbAppIBCJl4fx6_DG8-xOQPBvuFMlTROe1JVdHOzXI1GElwFDTuH1pjkg4I2G0NhAbE06Y-1illQE; gdsid=1697828393025:1697831731408:99C30D94108AC3030D61C736DDCDF11C',
            "gd-csrf-token": "Ft6oHEWlRZrxDww95Cpazw:0pGUrkb2y3TyOpAIqF2vbPmUXoXVkD3oEGDVkvfeCerceQ5-n8mBg3BovySUIjmCPHCaW0H2nQVdqzbtsYqf4Q:wcqRqeegRUa9MVLJGyujVXB7vWFPjdaS1CtrrzJq-ok",
            "origin": "https://www.glassdoor.com",
            "referer": "https://www.glassdoor.com/",
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@@ -6,12 +6,11 @@ This module contains routines to scrape Indeed.
 """
 import re
 import math
 import io
 import json
 import requests
 from typing import Any
 from datetime import datetime
 import urllib.parse
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from concurrent.futures import ThreadPoolExecutor, Future
@@ -22,7 +21,7 @@ from ..utils import (
    extract_emails_from_text,
    create_session,
    get_enum_from_job_type,
-    modify_and_get_description
+    logger
 )
 from ...jobs import (
    JobPost,
@@ -50,13 +49,14 @@ class IndeedScraper(Scraper):
    def scrape_page(
        self, scraper_input: ScraperInput, page: int
-    ) -> tuple[list[JobPost], int]:
+    ) -> list[JobPost]:
        """
        Scrapes a page of Indeed for jobs with scraper_input criteria
        :param scraper_input:
        :param page:
        :return: jobs found on page, total number of jobs found for search
        """
        job_list = []
        self.country = scraper_input.country
        domain = self.country.indeed_domain_value
        self.url = f"https://{domain}.indeed.com"
@@ -76,17 +76,18 @@ class IndeedScraper(Scraper):
                )
        except Exception as e:
            if "Proxy responded with" in str(e):
-                raise IndeedException("bad proxy")
+                logger.error(f'Indeed: Bad proxy')
-            raise IndeedException(str(e))
+            else:
                logger.error(f'Indeed: {str(e)}')
            return job_list
        soup = BeautifulSoup(response.content, "html.parser")
        if "did not match any jobs" in response.text:
-            raise IndeedException("Parsing exception: Search did not match any jobs")
+            return job_list
        jobs = IndeedScraper.parse_jobs(
            soup
        )  #: can raise exception, handled by main scrape function
        total_num_jobs = IndeedScraper.total_jobs(soup)
        if (
            not jobs.get("metaData", {})
@@ -95,75 +96,56 @@ class IndeedScraper(Scraper):
        ):
            raise IndeedException("No jobs found.")
-        def process_job(job: dict) -> JobPost | None:
+        def process_job(job: dict, job_detailed: dict) -> JobPost | None:
            job_url = f'{self.url}/m/jobs/viewjob?jk={job["jobkey"]}'
            job_url_client = f'{self.url}/viewjob?jk={job["jobkey"]}'
            if job_url in self.seen_urls:
                return None
            self.seen_urls.add(job_url)
            description = job_detailed['description']['html']
            extracted_salary = job.get("extractedSalary")
            compensation = None
            if extracted_salary:
                salary_snippet = job.get("salarySnippet")
                currency = salary_snippet.get("currency") if salary_snippet else None
                interval = (extracted_salary.get("type"),)
                if isinstance(interval, tuple):
                    interval = interval[0]
                interval = interval.upper()
                if interval in CompensationInterval.__members__:
                    compensation = Compensation(
                        interval=CompensationInterval[interval],
                        min_amount=int(extracted_salary.get("min")),
                        max_amount=int(extracted_salary.get("max")),
                        currency=currency,
                    )
            job_type = IndeedScraper.get_job_type(job)
            timestamp_seconds = job["pubDate"] / 1000
            date_posted = datetime.fromtimestamp(timestamp_seconds)
            date_posted = date_posted.strftime("%Y-%m-%d")
            description = self.get_description(job_url) if scraper_input.full_description else None
            with io.StringIO(job["snippet"]) as f:
                soup_io = BeautifulSoup(f, "html.parser")
                li_elements = soup_io.find_all("li")
                if description is None and li_elements:
                    description = " ".join(li.text for li in li_elements)
            job_post = JobPost(
                title=job["normTitle"],
                description=description,
                company_name=job["company"],
-                company_url=self.url + job["companyOverviewLink"] if "companyOverviewLink" in job else None,
+                company_url=f"{self.url}{job_detailed['employer']['relativeCompanyPageUrl']}" if job_detailed['employer'] else None,
                location=Location(
                    city=job.get("jobLocationCity"),
                    state=job.get("jobLocationState"),
                    country=self.country,
                ),
                job_type=job_type,
-                compensation=compensation,
+                compensation=self.get_compensation(job, job_detailed),
                date_posted=date_posted,
                job_url=job_url_client,
                emails=extract_emails_from_text(description) if description else None,
                num_urgent_words=count_urgent_words(description)
                if description
                else None,
-                is_remote=self.is_remote_job(job),
+                is_remote=IndeedScraper.is_job_remote(job, job_detailed, description)
            )
            return job_post
-        workers = 10 if scraper_input.full_description else 10  # possibly lessen 10 when fetching desc based on feedback
+        workers = 10
        jobs = jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
        job_keys = [job['jobkey'] for job in jobs]
        jobs_detailed = self.get_job_details(job_keys)
        with ThreadPoolExecutor(max_workers=workers) as executor:
            job_results: list[Future] = [
-                executor.submit(process_job, job) for job in jobs
+                executor.submit(process_job, job, job_detailed['job']) for job, job_detailed in zip(jobs, jobs_detailed)
            ]
        job_list = [result.result() for result in job_results if result.result()]
-        return job_list, total_num_jobs
+        return job_list
    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
        """
@@ -171,76 +153,36 @@ class IndeedScraper(Scraper):
        :param scraper_input:
        :return: job_response
        """
-        pages_to_process = (
+        job_list = self.scrape_page(scraper_input, 0)
-            math.ceil(scraper_input.results_wanted / self.jobs_per_page) - 1
+        pages_processed = 1
        )
-        #: get first page to initialize session
+        while len(self.seen_urls) < scraper_input.results_wanted:
-        job_list, total_results = self.scrape_page(scraper_input, 0)
+            pages_to_process = math.ceil((scraper_input.results_wanted - len(self.seen_urls)) / self.jobs_per_page)
            new_jobs = False
-        with ThreadPoolExecutor(max_workers=10) as executor:
+            with ThreadPoolExecutor(max_workers=10) as executor:
-            futures: list[Future] = [
+                futures: list[Future] = [
-                executor.submit(self.scrape_page, scraper_input, page)
+                    executor.submit(self.scrape_page, scraper_input, page + pages_processed)
-                for page in range(1, pages_to_process + 1)
+                    for page in range(pages_to_process)
-            ]
+                ]
-            for future in futures:
+                for future in futures:
-                jobs, _ = future.result()
+                    jobs = future.result()
                    if jobs:
                        job_list += jobs
                        new_jobs = True
                    if len(self.seen_urls) >= scraper_input.results_wanted:
                        break
-                job_list += jobs
+            pages_processed += pages_to_process
            if not new_jobs:
                break
        if len(job_list) > scraper_input.results_wanted:
            job_list = job_list[: scraper_input.results_wanted]
-        job_response = JobResponse(
+        if len(self.seen_urls) > scraper_input.results_wanted:
-            jobs=job_list,
+            job_list = job_list[:scraper_input.results_wanted]
            total_results=total_results,
        )
        return job_response
-    def get_description(self, job_page_url: str) -> str | None:
+        return JobResponse(jobs=job_list)
        """
        Retrieves job description by going to the job page url
        :param job_page_url:
        :return: description
        """
        parsed_url = urllib.parse.urlparse(job_page_url)
        params = urllib.parse.parse_qs(parsed_url.query)
        jk_value = params.get("jk", [None])[0]
        formatted_url = f"{self.url}/m/viewjob?jk={jk_value}&spa=1"
        session = create_session(self.proxy)
        try:
            response = session.get(
                formatted_url,
                headers=self.get_headers(),
                allow_redirects=True,
                timeout_seconds=5,
            )
        except Exception as e:
            return None
        if response.status_code not in range(200, 400):
            return None
        try:
            soup = BeautifulSoup(response.text, 'html.parser')
            script_tags = soup.find_all('script')
            job_description = ''
            for tag in script_tags:
                if 'window._initialData' in tag.text:
                    json_str = tag.text
                    json_str = json_str.split('window._initialData=')[1]
                    json_str = json_str.rsplit(';', 1)[0]
                    data = json.loads(json_str)
                    job_description = data["jobInfoWrapperModel"]["jobInfoModel"]["sanitizedJobDescription"]
                    break
        except (KeyError, TypeError, IndexError):
            return None
        soup = BeautifulSoup(job_description, "html.parser")
        return modify_and_get_description(soup)
    @staticmethod
    def get_job_type(job: dict) -> list[JobType] | None:
@@ -261,6 +203,44 @@ class IndeedScraper(Scraper):
                            job_types.append(job_type)
        return job_types
    @staticmethod
    def get_compensation(job: dict, job_detailed: dict) -> Compensation:
        """
        Parses the job to get
        :param job:
        :param job_detailed:
        :return: compensation object
        """
        comp = job_detailed['compensation']['baseSalary']
        if comp:
            interval = IndeedScraper.get_correct_interval(comp['unitOfWork'])
            if interval:
                return Compensation(
                    interval=interval,
                    min_amount=round(comp['range'].get('min'), 2) if comp['range'].get('min') is not None else None,
                    max_amount=round(comp['range'].get('max'), 2) if comp['range'].get('max') is not None else None,
                    currency=job_detailed['compensation']['currencyCode']
                )
        extracted_salary = job.get("extractedSalary")
        compensation = None
        if extracted_salary:
            salary_snippet = job.get("salarySnippet")
            currency = salary_snippet.get("currency") if salary_snippet else None
            interval = (extracted_salary.get("type"),)
            if isinstance(interval, tuple):
                interval = interval[0]
            interval = interval.upper()
            if interval in CompensationInterval.__members__:
                compensation = Compensation(
                    interval=CompensationInterval[interval],
                    min_amount=int(extracted_salary.get("min")),
                    max_amount=int(extracted_salary.get("max")),
                    currency=currency,
                )
        return compensation
    @staticmethod
    def parse_jobs(soup: BeautifulSoup) -> dict:
        """
@@ -302,24 +282,6 @@ class IndeedScraper(Scraper):
                "Could not find any results for the search"
            )
    @staticmethod
    def total_jobs(soup: BeautifulSoup) -> int:
        """
        Parses the total jobs for that search from soup object
        :param soup:
        :return: total_num_jobs
        """
        script = soup.find("script", string=lambda t: t and "window._initialData" in t)
        pattern = re.compile(r"window._initialData\s*=\s*({.*})\s*;", re.DOTALL)
        match = pattern.search(script.string)
        total_num_jobs = 0
        if match:
            json_str = match.group(1)
            data = json.loads(json_str)
            total_num_jobs = int(data["searchTitleBarModel"]["totalNumResults"])
        return total_num_jobs
    @staticmethod
    def get_headers():
        return {
@@ -333,25 +295,17 @@ class IndeedScraper(Scraper):
          'referer': 'https://www.indeed.com/m/jobs?q=software%20intern&l=Dallas%2C%20TX&from=serpso&rq=1&rsIdx=3',
        }
    @staticmethod
    def is_remote_job(job: dict) -> bool:
        """
        :param job:
        :return: bool
        """
        for taxonomy in job.get("taxonomyAttributes", []):
            if taxonomy["label"] == "remote" and len(taxonomy["attributes"]) > 0:
                return True
        return False
    @staticmethod
    def add_params(scraper_input: ScraperInput, page: int) -> dict[str, str | Any]:
        # `fromage` is the posting time filter in days
        fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None
        params = {
            "q": scraper_input.search_term,
-            "l": scraper_input.location,
+            "l": scraper_input.location if scraper_input.location else scraper_input.country.value[0].split(',')[-1],
            "filter": 0,
            "start": scraper_input.offset + page * 10,
-            "sort": "date"
+            "sort": "date",
            "fromage": fromage,
        }
        if scraper_input.distance:
            params["radius"] = scraper_input.distance
@@ -360,7 +314,7 @@ class IndeedScraper(Scraper):
        if scraper_input.is_remote:
            sc_values.append("attr(DSQF7)")
        if scraper_input.job_type:
-            sc_values.append("jt({})".format(scraper_input.job_type.value))
+            sc_values.append("jt({})".format(scraper_input.job_type.value[0]))
        if sc_values:
            params["sc"] = "0kf:" + "".join(sc_values) + ";"
@@ -369,3 +323,114 @@ class IndeedScraper(Scraper):
            params['iafilter'] = 1
        return params
    @staticmethod
    def is_job_remote(job: dict, job_detailed: dict, description: str) -> bool:
        remote_keywords = ['remote', 'work from home', 'wfh']
        is_remote_in_attributes = any(
            any(keyword in attr['label'].lower() for keyword in remote_keywords)
            for attr in job_detailed['attributes']
        )
        is_remote_in_description = any(keyword in description.lower() for keyword in remote_keywords)
        is_remote_in_location = any(
            keyword in job_detailed['location']['formatted']['long'].lower()
            for keyword in remote_keywords
        )
        is_remote_in_taxonomy = any(
            taxonomy["label"] == "remote" and len(taxonomy["attributes"]) > 0
            for taxonomy in job.get("taxonomyAttributes", [])
        )
        return is_remote_in_attributes or is_remote_in_description or is_remote_in_location or is_remote_in_taxonomy
    def get_job_details(self, job_keys: list[str]) -> dict:
        """
        Queries the GraphQL endpoint for detailed job information for the given job keys.
        """
        url = "https://apis.indeed.com/graphql"
        headers = {
            'Host': 'apis.indeed.com',
            'content-type': 'application/json',
            'indeed-api-key': '161092c2017b5bbab13edb12461a62d5a833871e7cad6d9d475304573de67ac8',
            'accept': 'application/json',
            'indeed-locale': 'en-US',
            'accept-language': 'en-US,en;q=0.9',
            'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 193.1',
            'indeed-app-info': 'appv=193.1; appid=com.indeed.jobsearch; osv=16.6.1; os=ios; dtype=phone',
            'indeed-co': 'US',
        }
        job_keys_gql = '[' + ', '.join(f'"{key}"' for key in job_keys) + ']'
        payload = {
            "query": f"""
            query GetJobData {{
              jobData(input: {{
                jobKeys: {job_keys_gql}
              }}) {{
                results {{
                  job {{
                    key
                    title
                    description {{
                      html
                    }}
                    location {{
                      countryName
                      countryCode
                      city
                      postalCode
                      streetAddress
                      formatted {{
                        short
                        long
                      }}
                    }}
                    compensation {{
                      baseSalary {{
                        unitOfWork
                        range {{
                          ... on Range {{
                            min
                            max
                          }}
                        }}
                      }}
                      currencyCode
                    }}
                    attributes {{
                      label
                    }}
                    employer {{
                      relativeCompanyPageUrl
                    }}
                    recruit {{
                      viewJobUrl
                      detailedSalary
                      workSchedule
                    }}
                  }}
                }}
              }}
            }}
            """
        }
        response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
        if response.status_code == 200:
            return response.json()['data']['jobData']['results']
        else:
            return {}
    @staticmethod
    def get_correct_interval(interval: str) -> CompensationInterval:
        interval_mapping = {
            "DAY": "DAILY",
            "YEAR": "YEARLY",
            "HOUR": "HOURLY",
            "WEEK": "WEEKLY",
            "MONTH": "MONTHLY"
        }
        mapped_interval = interval_mapping.get(interval.upper(), None)
        if mapped_interval and mapped_interval in CompensationInterval.__members__:
            return CompensationInterval[mapped_interval]
        else:
            raise ValueError(f"Unsupported interval: {interval}")
--- a/src/jobspy/scrapers/linkedin/init.py
+++ b/src/jobspy/scrapers/linkedin/init.py
@@ -31,8 +31,7 @@ from ..utils import (
    count_urgent_words,
    extract_emails_from_text,
    get_enum_from_job_type,
-    currency_parser,
+    currency_parser
    modify_and_get_description
 )
@@ -59,6 +58,12 @@ class LinkedInScraper(Scraper):
        url_lock = Lock()
        page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0
        seconds_old = (
            scraper_input.hours_old * 3600
            if scraper_input.hours_old
            else None
        )
        def job_type_code(job_type_enum):
            mapping = {
                JobType.FULL_TIME: "F",
@@ -70,7 +75,9 @@ class LinkedInScraper(Scraper):
            return mapping.get(job_type_enum, "")
-        while len(job_list) < scraper_input.results_wanted and page < 1000:
+        continue_search = lambda: len(job_list) < scraper_input.results_wanted and page < 1000
        while continue_search():
            session = create_session(is_tls=False, has_retry=True, delay=5)
            params = {
                "keywords": scraper_input.search_term,
@@ -83,6 +90,8 @@ class LinkedInScraper(Scraper):
                "pageNum": 0,
                "start": page + scraper_input.offset,
                "f_AL": "true" if scraper_input.easy_apply else None,
                "f_C": ','.join(map(str, scraper_input.linkedin_company_ids)) if scraper_input.linkedin_company_ids else None,
                "f_TPR": f"r{seconds_old}",
            }
            params = {k: v for k, v in params.items() if v is not None}
@@ -98,7 +107,9 @@ class LinkedInScraper(Scraper):
                response.raise_for_status()
            except requests.HTTPError as e:
-                raise LinkedInException(f"bad response status code: {e.response.status_code}")
+                raise LinkedInException(
                    f"bad response status code: {e.response.status_code}"
                )
            except ProxyError as e:
                raise LinkedInException("bad proxy")
            except Exception as e:
@@ -130,8 +141,9 @@ class LinkedInScraper(Scraper):
                except Exception as e:
                    raise LinkedInException("Exception occurred while processing jobs")
-            page += 25
+            if continue_search():
-            time.sleep(random.uniform(LinkedInScraper.DELAY, LinkedInScraper.DELAY + 2))
+                time.sleep(random.uniform(LinkedInScraper.DELAY, LinkedInScraper.DELAY + 2))
                page += 25
        job_list = job_list[: scraper_input.results_wanted]
        return JobResponse(jobs=job_list)
@@ -141,11 +153,11 @@ class LinkedInScraper(Scraper):
        compensation = None
        if salary_tag:
-            salary_text = salary_tag.get_text(separator=' ').strip()
+            salary_text = salary_tag.get_text(separator=" ").strip()
-            salary_values = [currency_parser(value) for value in salary_text.split('-')]
+            salary_values = [currency_parser(value) for value in salary_text.split("-")]
            salary_min = salary_values[0]
            salary_max = salary_values[1]
-            currency = salary_text[0] if salary_text[0] != '$' else 'USD'
+            currency = salary_text[0] if salary_text[0] != "$" else "USD"
            compensation = Compensation(
                min_amount=int(salary_min),
@@ -223,10 +235,15 @@ class LinkedInScraper(Scraper):
        div_content = soup.find(
            "div", class_=lambda x: x and "show-more-less-html__markup" in x
        )
        description = None
-        if div_content:
+        if div_content is not None:
-            description = modify_and_get_description(div_content)
+            def remove_attributes(tag):
                for attr in list(tag.attrs):
                    del tag[attr]
                return tag
            div_content = remove_attributes(div_content)
            description = div_content.prettify(formatter="html")
        def get_job_type(
            soup_job_type: BeautifulSoup,
@@ -290,17 +307,17 @@ class LinkedInScraper(Scraper):
    @staticmethod
    def headers() -> dict:
        return {
-            'authority': 'www.linkedin.com',
+            "authority": "www.linkedin.com",
-            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            'accept-language': 'en-US,en;q=0.9',
+            "accept-language": "en-US,en;q=0.9",
-            'cache-control': 'max-age=0',
+            "cache-control": "max-age=0",
-            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+            "sec-ch-ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            # 'sec-ch-ua-mobile': '?0',
            # 'sec-ch-ua-platform': '"macOS"',
            # 'sec-fetch-dest': 'document',
            # 'sec-fetch-mode': 'navigate',
            # 'sec-fetch-site': 'none',
            # 'sec-fetch-user': '?1',
-            'upgrade-insecure-requests': '1',
+            "upgrade-insecure-requests": "1",
-            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        }
--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@@ -1,4 +1,5 @@
 import re
 import logging
 import numpy as np
 import tls_client
@@ -7,14 +8,14 @@ from requests.adapters import HTTPAdapter, Retry
 from ..jobs import JobType
-
+logger = logging.getLogger("JobSpy")
-def modify_and_get_description(soup):
+if not logger.handlers:
-    for li in soup.find_all('li'):
+    logger.setLevel(logging.ERROR)
-        li.string = "- " + li.get_text()
+    console_handler = logging.StreamHandler()
-
+    console_handler.setLevel(logging.ERROR)
-    description = soup.get_text(separator='\n').strip()
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-    description = re.sub(r'\n+', '\n', description)
+    console_handler.setFormatter(formatter)
-    return description
+    logger.addHandler(console_handler)
 def count_urgent_words(description: str) -> int:
@@ -79,6 +80,7 @@ def get_enum_from_job_type(job_type_str: str) -> JobType | None:
            res = job_type
    return res
 def currency_parser(cur_str):
    # Remove any non-numerical characters
    # except for ',' '.' or '-' (e.g. EUR)
@@ -94,3 +96,5 @@ def currency_parser(cur_str):
        num = float(cur_str)
    return np.round(num, 2)
--- a/src/jobspy/scrapers/ziprecruiter/init.py
+++ b/src/jobspy/scrapers/ziprecruiter/init.py
@@ -6,17 +6,15 @@ This module contains routines to scrape ZipRecruiter.
 """
 import math
 import time
-import re
+from datetime import datetime, timezone
 from datetime import datetime, date
 from typing import Optional, Tuple, Any
 from bs4 import BeautifulSoup
 from concurrent.futures import ThreadPoolExecutor
 from .. import Scraper, ScraperInput, Site
 from ..exceptions import ZipRecruiterException
 from ...jobs import JobPost, Compensation, Location, JobResponse, JobType, Country
-from ..utils import count_urgent_words, extract_emails_from_text, create_session, modify_and_get_description
+from ..utils import count_urgent_words, extract_emails_from_text, create_session
 class ZipRecruiterScraper(Scraper):
@@ -32,6 +30,7 @@ class ZipRecruiterScraper(Scraper):
        self.jobs_per_page = 20
        self.seen_urls = set()
        self.delay = 5
    def find_jobs_in_page(
        self, scraper_input: ScraperInput, continue_token: str | None = None
@@ -44,12 +43,12 @@ class ZipRecruiterScraper(Scraper):
        """
        params = self.add_params(scraper_input)
        if continue_token:
-            params["continue"] = continue_token
+            params["continue_from"] = continue_token
        try:
            response = self.session.get(
                f"https://api.ziprecruiter.com/jobs-app/jobs",
                headers=self.headers(),
-                params=self.add_params(scraper_input),
+                params=params
            )
            if response.status_code != 200:
                raise ZipRecruiterException(
@@ -60,7 +59,6 @@ class ZipRecruiterScraper(Scraper):
                raise ZipRecruiterException("bad proxy")
            raise ZipRecruiterException(str(e))
        time.sleep(5)
        response_data = response.json()
        jobs_list = response_data.get("jobs", [])
        next_continue_token = response_data.get("continue", None)
@@ -68,7 +66,7 @@ class ZipRecruiterScraper(Scraper):
        with ThreadPoolExecutor(max_workers=self.jobs_per_page) as executor:
            job_results = [executor.submit(self.process_job, job) for job in jobs_list]
-        job_list = [result.result() for result in job_results if result.result()]
+        job_list = list(filter(None, (result.result() for result in job_results)))
        return job_list, next_continue_token
    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
@@ -86,6 +84,9 @@ class ZipRecruiterScraper(Scraper):
            if len(job_list) >= scraper_input.results_wanted:
                break
            if page > 1:
                time.sleep(self.delay)
            jobs_on_page, continue_token = self.find_jobs_in_page(
                scraper_input, continue_token
            )
@@ -95,22 +96,19 @@ class ZipRecruiterScraper(Scraper):
            if not continue_token:
                break
-        if len(job_list) > scraper_input.results_wanted:
+        return JobResponse(jobs=job_list[: scraper_input.results_wanted])
            job_list = job_list[: scraper_input.results_wanted]
-        return JobResponse(jobs=job_list)
+    def process_job(self, job: dict) -> JobPost | None:
    @staticmethod
    def process_job(job: dict) -> JobPost:
        """Processes an individual job dict from the response"""
        title = job.get("name")
-        job_url = job.get("job_url")
+        job_url = f"https://www.ziprecruiter.com/jobs//j?lvk={job['listing_key']}"
        if job_url in self.seen_urls:
            return
        self.seen_urls.add(job_url)
-        job_description_html = job.get("job_description", "").strip()
+        description = job.get("job_description", "").strip()
        description_soup = BeautifulSoup(job_description_html, "html.parser")
        description = modify_and_get_description(description_soup)
-        company = job["hiring_company"].get("name") if "hiring_company" in job else None
+        company = job.get("hiring_company", {}).get("name")
        country_value = "usa" if job.get("job_country") == "US" else "canada"
        country_enum = Country.from_string(country_value)
@@ -120,17 +118,7 @@ class ZipRecruiterScraper(Scraper):
        job_type = ZipRecruiterScraper.get_job_type_enum(
            job.get("employment_type", "").replace("_", "").lower()
        )
-
+        date_posted = datetime.fromisoformat(job['posted_time'].rstrip("Z")).date()
        save_job_url = job.get("SaveJobURL", "")
        posted_time_match = re.search(
            r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url
        )
        if posted_time_match:
            date_time_str = posted_time_match.group(1)
            date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
            date_posted = date_posted_obj.date()
        else:
            date_posted = date.today()
        return JobPost(
            title=title,
@@ -173,30 +161,25 @@ class ZipRecruiterScraper(Scraper):
        params = {
            "search": scraper_input.search_term,
            "location": scraper_input.location,
            "form": "jobs-landing",
        }
-        job_type_value = None
+        if scraper_input.hours_old:
            fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None
            params['days'] = fromage
        job_type_map = {
            JobType.FULL_TIME: 'full_time',
            JobType.PART_TIME: 'part_time'
        }
        if scraper_input.job_type:
-            if scraper_input.job_type.value == "fulltime":
+            params['employment_type'] = job_type_map[scraper_input.job_type] if scraper_input.job_type in job_type_map else scraper_input.job_type.value[0]
                job_type_value = "full_time"
            elif scraper_input.job_type.value == "parttime":
                job_type_value = "part_time"
            else:
                job_type_value = scraper_input.job_type.value
        if scraper_input.easy_apply:
            params['zipapply'] = 1
        if job_type_value:
            params[
                "refine_by_employment"
            ] = f"employment_type:employment_type:{job_type_value}"
        if scraper_input.is_remote:
-            params["refine_by_location_type"] = "only_remote"
+            params["remote"] = 1
        if scraper_input.distance:
            params["radius"] = scraper_input.distance
        params = {k: v for k, v in params.items() if v is not None}
        return params
    @staticmethod
Author	SHA1	Message	Date
Cullen Watson	aeb1a50d2c	fix job type search (#106 )	2024-02-12 11:02:48 -06:00
VitaminB16	91b137ef86	feat: Ability to query by time posted for linkedin, indeed, glassdoor, ziprecruiter (#103 )	2024-02-09 14:02:03 -06:00
Cullen Watson	2563c5ca08	enh: Indeed company url (#104 )	2024-02-09 12:05:10 -06:00
Cullen Watson	32282305c8	docs: readme	2024-02-08 18:13:19 -06:00
Cullen Watson	ccbea51f3c	docs: readme	2024-02-04 09:25:10 -06:00
Cullen Watson	6ec7c24f7f	enh(linkedin): search by company ids (#99 )	2024-02-04 09:21:45 -06:00
Cullen Watson	02caf1b38d	fix(zr): date posted (#98 )	2024-02-03 07:20:53 -06:00
Cullen Watson	8e2ab277da	fix(ziprecruiter): pagination (#97 ) * fix(ziprecruiter): pagination * chore: version	2024-02-02 20:48:28 -06:00