enh: indeed more fields (#126)

2026-03-05 12:04:33 -08:00 · 2024-03-09 01:40:01 -06:00
parent a4f6851c32
commit 0a669e9ba8
9 changed files with 267 additions and 288 deletions
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@@ -3,6 +3,7 @@ from typing import Tuple
 from concurrent.futures import ThreadPoolExecutor, as_completed

 from .jobs import JobType, Location
+from .scrapers.utils import logger
 from .scrapers.indeed import IndeedScraper
 from .scrapers.ziprecruiter import ZipRecruiterScraper
 from .scrapers.glassdoor import GlassdoorScraper
@@ -20,7 +21,7 @@ def scrape_jobs(
    site_name: str | list[str] | Site | list[Site] | None = None,
    search_term: str | None = None,
    location: str | None = None,
-    distance: int | None = None,
+    distance: int | None = 50,
    is_remote: bool = False,
    job_type: str | None = None,
    easy_apply: bool | None = None,
@@ -92,6 +93,8 @@ def scrape_jobs(
        scraper_class = SCRAPER_MAPPING[site]
        scraper = scraper_class(proxy=proxy)
        scraped_data: JobResponse = scraper.scrape(scraper_input)
+        site_name = 'ZipRecruiter' if site.value.capitalize() == 'Zip_recruiter' else site.value.capitalize()
+        logger.info(f"{site_name} finished scraping")
        return site.value, scraped_data

    site_to_jobs_dict = {}
@@ -160,11 +163,11 @@ def scrape_jobs(
        
        # Desired column order
        desired_order = [
-            "job_url_hyper" if hyperlinks else "job_url",
            "site",
+            "job_url_hyper" if hyperlinks else "job_url",
+            "job_url_direct",
            "title",
            "company",
-            "company_url",
            "location",
            "job_type",
            "date_posted",
@@ -173,10 +176,20 @@ def scrape_jobs(
            "max_amount",
            "currency",
            "is_remote",
-            "num_urgent_words",
-            "benefits",
            "emails",
            "description",
+
+            "company_url",
+            "company_url_direct",
+            "company_addresses",
+            "company_industry",
+            "company_num_employees",
+            "company_revenue",
+            "company_description",
+            "logo_photo_url",
+            "banner_photo_url",
+            "ceo_name",
+            "ceo_photo_url",
        ]
        
        # Step 3: Ensure all desired columns are present, adding missing ones as empty
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@@ -57,7 +57,7 @@ class JobType(Enum):
 class Country(Enum):
    """
    Gets the subdomain for Indeed and Glassdoor.
-    The second item in the tuple is the subdomain for Indeed
+    The second item in the tuple is the subdomain (and API country code if there's a ':' separator) for Indeed
    The third item in the tuple is the subdomain (and tld if there's a ':' separator) for Glassdoor
    """

@@ -118,8 +118,8 @@ class Country(Enum):
    TURKEY = ("turkey", "tr")
    UKRAINE = ("ukraine", "ua")
    UNITEDARABEMIRATES = ("united arab emirates", "ae")
-    UK = ("uk,united kingdom", "uk", "co.uk")
-    USA = ("usa,us,united states", "www", "com")
+    UK = ("uk,united kingdom", "uk:gb", "co.uk")
+    USA = ("usa,us,united states", "www:us", "com")
    URUGUAY = ("uruguay", "uy")
    VENEZUELA = ("venezuela", "ve")
    VIETNAM = ("vietnam", "vn", "com")
@@ -132,7 +132,10 @@ class Country(Enum):

    @property
    def indeed_domain_value(self):
-        return self.value[1]
+        subdomain, _, api_country_code = self.value[1].partition(":")
+        if subdomain and api_country_code:
+            return subdomain, api_country_code.upper()
+        return self.value[1], self.value[1].upper()

    @property
    def glassdoor_domain_value(self):
@@ -163,7 +166,7 @@ class Country(Enum):


 class Location(BaseModel):
-    country: Country | None = None
+    country: Country | str | None = None
    city: Optional[str] = None
    state: Optional[str] = None

@@ -173,7 +176,9 @@ class Location(BaseModel):
            location_parts.append(self.city)
        if self.state:
            location_parts.append(self.state)
-        if self.country and self.country not in (Country.US_CANADA, Country.WORLDWIDE):
+        if isinstance(self.country, str):
+            location_parts.append(self.country)
+        elif self.country and self.country not in (Country.US_CANADA, Country.WORLDWIDE):
            country_name = self.country.value[0]
            if "," in country_name:
                country_name = country_name.split(",")[0]
@@ -217,21 +222,31 @@ class DescriptionFormat(Enum):

 class JobPost(BaseModel):
    title: str
-    company_name: str
+    company_name: str | None
    job_url: str
+    job_url_direct: str | None = None
    location: Optional[Location]

    description: str | None = None
    company_url: str | None = None
+    company_url_direct: str | None = None

    job_type: list[JobType] | None = None
    compensation: Compensation | None = None
    date_posted: date | None = None
-    benefits: str | None = None
    emails: list[str] | None = None
-    num_urgent_words: int | None = None
    is_remote: bool | None = None
-    # company_industry: str | None = None
+
+    # indeed specific
+    company_addresses: str | None = None
+    company_industry: str | None = None
+    company_num_employees: str | None = None
+    company_revenue: str | None = None
+    company_description: str | None = None
+    ceo_name: str | None = None
+    ceo_photo_url: str | None = None
+    logo_photo_url: str | None = None
+    banner_photo_url: str | None = None


 class JobResponse(BaseModel):
--- a/src/jobspy/scrapers/glassdoor/init.py
+++ b/src/jobspy/scrapers/glassdoor/init.py
@@ -11,7 +11,7 @@ import requests
 from typing import Optional
 from datetime import datetime, timedelta
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from ..utils import count_urgent_words, extract_emails_from_text
+from ..utils import extract_emails_from_text

 from .. import Scraper, ScraperInput, Site
 from ..exceptions import GlassdoorException
@@ -188,7 +188,6 @@ class GlassdoorScraper(Scraper):
            is_remote=is_remote,
            description=description,
            emails=extract_emails_from_text(description) if description else None,
-            num_urgent_words=count_urgent_words(description) if description else None,
        )

    def _fetch_job_description(self, job_id):
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@@ -4,21 +4,15 @@ jobspy.scrapers.indeed

 This module contains routines to scrape Indeed.
 """
-import re
 import math
-import json
-import requests
-from typing import Any
+from concurrent.futures import ThreadPoolExecutor, Future
 from datetime import datetime

-from bs4 import BeautifulSoup
-from bs4.element import Tag
-from concurrent.futures import ThreadPoolExecutor, Future
+import requests

+from .. import Scraper, ScraperInput, Site
 from ..utils import (
-    count_urgent_words,
    extract_emails_from_text,
-    create_session,
    get_enum_from_job_type,
    markdown_converter,
    logger
@@ -32,18 +26,19 @@ from ...jobs import (
    JobType,
    DescriptionFormat
 )
-from .. import Scraper, ScraperInput, Site


 class IndeedScraper(Scraper):
    def __init__(self, proxy: str | None = None):
        """
-        Initializes IndeedScraper with the Indeed job search url
+        Initializes IndeedScraper with the Indeed API url
        """
        self.scraper_input = None
-        self.jobs_per_page = 25
+        self.jobs_per_page = 100
        self.num_workers = 10
        self.seen_urls = set()
+        self.headers = None
+        self.api_country_code = None
        self.base_url = None
        self.api_url = "https://apis.indeed.com/graphql"
        site = Site(Site.INDEED)
@@ -56,278 +51,220 @@ class IndeedScraper(Scraper):
        :return: job_response
        """
        self.scraper_input = scraper_input
-        job_list = self._scrape_page()
-        pages_processed = 1
+        domain, self.api_country_code = self.scraper_input.country.indeed_domain_value
+        self.base_url = f"https://{domain}.indeed.com"
+        self.headers = self.api_headers.copy()
+        self.headers['indeed-co'] = self.scraper_input.country.indeed_domain_value
+        job_list = []
+        page = 1

-        while len(self.seen_urls) < scraper_input.results_wanted:
-            pages_to_process = math.ceil((scraper_input.results_wanted - len(self.seen_urls)) / self.jobs_per_page)
-            new_jobs = False
-            with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
-                futures: list[Future] = [
-                    executor.submit(self._scrape_page, page + pages_processed)
-                    for page in range(pages_to_process)
-                ]
-
-                for future in futures:
-                    jobs = future.result()
-                    if jobs:
-                        job_list += jobs
-                        new_jobs = True
-                    if len(self.seen_urls) >= scraper_input.results_wanted:
-                        break
-
-            pages_processed += pages_to_process
-            if not new_jobs:
+        cursor = None
+        offset_pages = math.ceil(self.scraper_input.offset / 100)
+        for _ in range(offset_pages):
+            logger.info(f'Indeed skipping search page: {page}')
+            __, cursor = self._scrape_page(cursor)
+            if not __:
+                logger.info(f'Indeed found no jobs on page: {page}')
                break

-        if len(self.seen_urls) > scraper_input.results_wanted:
-            job_list = job_list[:scraper_input.results_wanted]
+        while len(self.seen_urls) < scraper_input.results_wanted:
+            logger.info(f'Indeed search page: {page}')
+            jobs, cursor = self._scrape_page(cursor)
+            if not jobs:
+                logger.info(f'Indeed found no jobs on page: {page}')
+                break
+            job_list += jobs
+            page += 1
+        return JobResponse(jobs=job_list[:scraper_input.results_wanted])

-        return JobResponse(jobs=job_list)
-
-    def _scrape_page(self, page: int=0) -> list[JobPost]:
+    def _scrape_page(self, cursor: str | None) -> (list[JobPost], str | None):
        """
        Scrapes a page of Indeed for jobs with scraper_input criteria
-        :param page:
-        :return: jobs found on page, total number of jobs found for search
+        :param cursor:
+        :return: jobs found on page, next page cursor
        """
-        logger.info(f'Indeed search page: {page + 1}')
-        job_list = []
-        domain = self.scraper_input.country.indeed_domain_value
-        self.base_url = f"https://{domain}.indeed.com"
-        
-        try:
-            session = create_session(self.proxy)
-            response = session.get(
-                f"{self.base_url}/m/jobs",
-                headers=self.headers,
-                params=self._add_params(page),
-            )
-            if response.status_code not in range(200, 400):
-                if response.status_code == 429:
-                    logger.error(f'429 Response - Blocked by Indeed for too many requests')
-                else:
-                    logger.error(f'Indeed response status code {response.status_code}')
-                return job_list
-
-        except Exception as e:
-            if "Proxy responded with" in str(e):
-                logger.error(f'Indeed: Bad proxy')
-            else:
-                logger.error(f'Indeed: {str(e)}')
-            return job_list
-
-        soup = BeautifulSoup(response.content, "html.parser")
-        if "did not match any jobs" in response.text:
-            return job_list
-
-        jobs = IndeedScraper._parse_jobs(soup)
-        if not jobs:
-            return []
-        if (
-            not jobs.get("metaData", {})
-            .get("mosaicProviderJobCardsModel", {})
-            .get("results")
-        ):
-            logger.error("Indeed - No jobs found.")
-            return []
-
-        jobs = jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
-        job_keys = [job['jobkey'] for job in jobs]
-        jobs_detailed = self._get_job_details(job_keys)
+        jobs = []
+        new_cursor = None
+        filters = self._build_filters()
+        query = self.job_search_query.format(
+            what=self.scraper_input.search_term,
+            location=self.scraper_input.location if self.scraper_input.location else self.scraper_input.country.value[0].split(',')[-1],
+            radius=self.scraper_input.distance,
+            dateOnIndeed=self.scraper_input.hours_old,
+            cursor=f'cursor: "{cursor}"' if cursor else '',
+            filters=filters
+        )
+        payload = {
+            'query': query,
+        }
+        api_headers = self.api_headers.copy()
+        api_headers['indeed-co'] = self.api_country_code
+        response = requests.post(self.api_url, headers=api_headers, json=payload, proxies=self.proxy, timeout=10)
+        if response.status_code != 200:
+            logger.info(f'Indeed responded with status code: {response.status_code} (submit GitHub issue if this appears to be a beg)')
+            return jobs, new_cursor
+        data = response.json()
+        jobs = data['data']['jobSearch']['results']
+        new_cursor = data['data']['jobSearch']['pageInfo']['nextCursor']

        with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
            job_results: list[Future] = [
-                executor.submit(self._process_job, job, job_detailed['job']) for job, job_detailed in zip(jobs, jobs_detailed)
-            ]
+                executor.submit(self._process_job, job['job']) for job in jobs
+           ]
        job_list = [result.result() for result in job_results if result.result()]
+        return job_list, new_cursor

-        return job_list
+    def _build_filters(self):
+        """
+        Builds the filters dict for job type/is_remote. If hours_old is provided, composite filter for job_type/is_remote is not possible.
+        IndeedApply: filters: { keyword: { field: "indeedApplyScope", keys: ["DESKTOP"] } }
+        """
+        filters_str = ""
+        if self.scraper_input.hours_old:
+            filters_str = """
+            filters: {{
+                date: {{
+                  field: "dateOnIndeed",
+                  start: "{start}h"
+                }}
+            }}
+            """.format(start=self.scraper_input.hours_old)
+        elif self.scraper_input.job_type or self.scraper_input.is_remote:
+            job_type_key_mapping = {
+                JobType.FULL_TIME: "CF3CP",
+                JobType.PART_TIME: "75GKK",
+                JobType.CONTRACT: "NJXCK",
+                JobType.INTERNSHIP: "VDTG7",
+            }

-    def _process_job(self, job: dict, job_detailed: dict) -> JobPost | None:
-        job_url = f'{self.base_url}/m/jobs/viewjob?jk={job["jobkey"]}'
-        job_url_client = f'{self.base_url}/viewjob?jk={job["jobkey"]}'
+            keys = []
+            if self.scraper_input.job_type:
+                key = job_type_key_mapping[self.scraper_input.job_type]
+                keys.append(key)
+
+            if self.scraper_input.is_remote:
+                keys.append("DSQF7")
+
+            if keys:
+                keys_str = '", "'.join(keys)  # Prepare your keys string
+                filters_str = f"""
+                filters: {{
+                  composite: {{
+                    filters: [{{
+                      keyword: {{
+                        field: "attributes",
+                        keys: ["{keys_str}"]
+                      }}
+                    }}]
+                  }}
+                }}
+                """
+        return filters_str
+
+    def _process_job(self, job: dict) -> JobPost | None:
+        """
+        Parses the job dict into JobPost model
+        :param job: dict to parse
+        :return: JobPost if it's a new job
+        """
+        job_url = f'{self.base_url}/viewjob?jk={job["key"]}'
        if job_url in self.seen_urls:
-            return None
+            return
        self.seen_urls.add(job_url)
-        description = job_detailed['description']['html']
+        description = job['description']['html']
        description = markdown_converter(description) if self.scraper_input.description_format == DescriptionFormat.MARKDOWN else description
-        job_type = self._get_job_type(job)
-        timestamp_seconds = job["pubDate"] / 1000
-        date_posted = datetime.fromtimestamp(timestamp_seconds)
-        date_posted = date_posted.strftime("%Y-%m-%d")
+
+        job_type = self._get_job_type(job['attributes'])
+        timestamp_seconds = job["datePublished"] / 1000
+        date_posted = datetime.fromtimestamp(timestamp_seconds).strftime("%Y-%m-%d")
+        employer = job['employer'].get('dossier') if job['employer'] else None
+        employer_details = employer.get('employerDetails', {}) if employer else {}
        return JobPost(
-            title=job["normTitle"],
+            title=job["title"],
            description=description,
-            company_name=job["company"],
-            company_url=f"{self.base_url}{job_detailed['employer']['relativeCompanyPageUrl']}" if job_detailed[
+            company_name=job['employer'].get("name") if job.get('employer') else None,
+            company_url=f"{self.base_url}{job['employer']['relativeCompanyPageUrl']}" if job[
                'employer'] else None,
+            company_url_direct=employer['links']['corporateWebsite'] if employer else None,
+
            location=Location(
-                city=job.get("jobLocationCity"),
-                state=job.get("jobLocationState"),
-                country=self.scraper_input.country,
+                city=job.get("location", {}).get("city"),
+                state=job.get("location", {}).get("admin1Code"),
+                country=job.get("location", {}).get("countryCode"),
            ),
            job_type=job_type,
-            compensation=self._get_compensation(job, job_detailed),
+            compensation=self._get_compensation(job),
            date_posted=date_posted,
-            job_url=job_url_client,
+            job_url=job_url,
+            job_url_direct=job['recruit'].get('viewJobUrl') if job.get('recruit') else None,
            emails=extract_emails_from_text(description) if description else None,
-            num_urgent_words=count_urgent_words(description) if description else None,
-            is_remote=self._is_job_remote(job, job_detailed, description)
+            is_remote=self._is_job_remote(job, description),
+
+            company_addresses=employer_details['addresses'][0] if employer_details.get('addresses') else None,
+            company_industry=employer_details['industry'].replace('Iv1', '').replace('_', ' ').title() if employer_details.get('industry') else None,
+            company_num_employees=employer_details.get('employeesLocalizedLabel'),
+            company_revenue=employer_details.get('revenueLocalizedLabel'),
+            company_description=employer_details.get('briefDescription'),
+            ceo_name=employer_details.get('ceoName'),
+            ceo_photo_url=employer_details.get('ceoPhotoUrl'),
+
+            logo_photo_url=employer['images'].get('squareLogoUrl') if employer and employer.get('images') else None,
+            banner_photo_url=employer['images'].get('headerImageUrl') if employer and employer.get('images') else None,
        )

-    def _get_job_details(self, job_keys: list[str]) -> dict:
-        """
-        Queries the GraphQL endpoint for detailed job information for the given job keys.
-        """
-        job_keys_gql = '[' + ', '.join(f'"{key}"' for key in job_keys) + ']'
-        payload = dict(self.api_payload)
-        payload["query"] = self.api_payload["query"].format(job_keys_gql=job_keys_gql)
-        response = requests.post(self.api_url, headers=self.api_headers, json=payload, proxies=self.proxy)
-        if response.status_code == 200:
-            return response.json()['data']['jobData']['results']
-        else:
-            return {}
-
-    def _add_params(self, page: int) -> dict[str, str | Any]:
-        fromage = max(self.scraper_input.hours_old // 24, 1) if self.scraper_input.hours_old else None
-        params = {
-            "q": self.scraper_input.search_term,
-            "l": self.scraper_input.location if self.scraper_input.location else self.scraper_input.country.value[0].split(',')[-1],
-            "filter": 0,
-            "start": self.scraper_input.offset + page * 10,
-            "sort": "date",
-            "fromage": fromage,
-        }
-        if self.scraper_input.distance:
-            params["radius"] = self.scraper_input.distance
-
-        sc_values = []
-        if self.scraper_input.is_remote:
-            sc_values.append("attr(DSQF7)")
-        if self.scraper_input.job_type:
-            sc_values.append("jt({})".format(self.scraper_input.job_type.value[0]))
-
-        if sc_values:
-            params["sc"] = "0kf:" + "".join(sc_values) + ";"
-
-        if self.scraper_input.easy_apply:
-            params['iafilter'] = 1
-
-        return params
-
    @staticmethod
-    def _get_job_type(job: dict) -> list[JobType] | None:
+    def _get_job_type(attributes: list) -> list[JobType]:
        """
-        Parses the job to get list of job types
-        :param job:
-        :return:
+        Parses the attributes to get list of job types
+        :param attributes:
+        :return: list of JobType
        """
        job_types: list[JobType] = []
-        for taxonomy in job["taxonomyAttributes"]:
-            if taxonomy["label"] == "job-types":
-                for i in range(len(taxonomy["attributes"])):
-                    label = taxonomy["attributes"][i].get("label")
-                    if label:
-                        job_type_str = label.replace("-", "").replace(" ", "").lower()
-                        job_type = get_enum_from_job_type(job_type_str)
-                        if job_type:
-                            job_types.append(job_type)
+        for attribute in attributes:
+            job_type_str = attribute['label'].replace("-", "").replace(" ", "").lower()
+            job_type = get_enum_from_job_type(job_type_str)
+            if job_type:
+                job_types.append(job_type)
        return job_types

    @staticmethod
-    def _get_compensation(job: dict, job_detailed: dict) -> Compensation:
+    def _get_compensation(job: dict) -> Compensation | None:
        """
-        Parses the job to get
+        Parses the job to get compensation
+        :param job:
        :param job:
-        :param job_detailed:
        :return: compensation object
        """
-        comp = job_detailed['compensation']['baseSalary']
+        comp = job['compensation']['baseSalary']
        if comp:
-            interval = IndeedScraper._get_correct_interval(comp['unitOfWork'])
+            interval = IndeedScraper._get_compensation_interval(comp['unitOfWork'])
            if interval:
                return Compensation(
                    interval=interval,
                    min_amount=round(comp['range'].get('min'), 2) if comp['range'].get('min') is not None else None,
                    max_amount=round(comp['range'].get('max'), 2) if comp['range'].get('max') is not None else None,
-                    currency=job_detailed['compensation']['currencyCode']
+                    currency=job['compensation']['currencyCode']
                )

-        extracted_salary = job.get("extractedSalary")
-        compensation = None
-        if extracted_salary:
-            salary_snippet = job.get("salarySnippet")
-            currency = salary_snippet.get("currency") if salary_snippet else None
-            interval = (extracted_salary.get("type"),)
-            if isinstance(interval, tuple):
-                interval = interval[0]
-
-            interval = interval.upper()
-            if interval in CompensationInterval.__members__:
-                compensation = Compensation(
-                    interval=CompensationInterval[interval],
-                    min_amount=int(extracted_salary.get("min")),
-                    max_amount=int(extracted_salary.get("max")),
-                    currency=currency,
-                )
-        return compensation
-
    @staticmethod
-    def _parse_jobs(soup: BeautifulSoup) -> dict:
+    def _is_job_remote(job: dict, description: str) -> bool:
        """
-        Parses the jobs from the soup object
-        :param soup:
-        :return: jobs
+        Searches the description, location, and attributes to check if job is remote
        """
-        def find_mosaic_script() -> Tag | None:
-            script_tags = soup.find_all("script")
-
-            for tag in script_tags:
-                if (
-                    tag.string
-                    and "mosaic.providerData" in tag.string
-                    and "mosaic-provider-jobcards" in tag.string
-                ):
-                    return tag
-            return None
-
-        script_tag = find_mosaic_script()
-        if script_tag:
-            script_str = script_tag.string
-            pattern = r'window.mosaic.providerData\["mosaic-provider-jobcards"\]\s*=\s*({.*?});'
-            p = re.compile(pattern, re.DOTALL)
-            m = p.search(script_str)
-            if m:
-                jobs = json.loads(m.group(1).strip())
-                return jobs
-            else:
-                logger.warning(f'Indeed: Could not find mosaic provider job cards data')
-                return {}
-        else:
-            logger.warning(f"Indeed: Could not parse any jobs on the page")
-            return {}
-
-    @staticmethod
-    def _is_job_remote(job: dict, job_detailed: dict, description: str) -> bool:
        remote_keywords = ['remote', 'work from home', 'wfh']
        is_remote_in_attributes = any(
            any(keyword in attr['label'].lower() for keyword in remote_keywords)
-            for attr in job_detailed['attributes']
+            for attr in job['attributes']
        )
        is_remote_in_description = any(keyword in description.lower() for keyword in remote_keywords)
        is_remote_in_location = any(
-            keyword in job_detailed['location']['formatted']['long'].lower()
+            keyword in job['location']['formatted']['long'].lower()
            for keyword in remote_keywords
        )
-        is_remote_in_taxonomy = any(
-            taxonomy["label"] == "remote" and len(taxonomy["attributes"]) > 0
-            for taxonomy in job.get("taxonomyAttributes", [])
-        )
-        return is_remote_in_attributes or is_remote_in_description or is_remote_in_location or is_remote_in_taxonomy
+        return is_remote_in_attributes or is_remote_in_description or is_remote_in_location

    @staticmethod
-    def _get_correct_interval(interval: str) -> CompensationInterval:
+    def _get_compensation_interval(interval: str) -> CompensationInterval:
        interval_mapping = {
            "DAY": "DAILY",
            "YEAR": "YEARLY",
@@ -341,16 +278,6 @@ class IndeedScraper(Scraper):
        else:
            raise ValueError(f"Unsupported interval: {interval}")

-    headers =  {
-      'Host': 'www.indeed.com',
-      'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-      'sec-fetch-site': 'same-origin',
-      'sec-fetch-dest': 'document',
-      'accept-language': 'en-US,en;q=0.9',
-      'sec-fetch-mode': 'navigate',
-      'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 192.0',
-      'referer': 'https://www.indeed.com/m/jobs?q=software%20intern&l=Dallas%2C%20TX&from=serpso&rq=1&rsIdx=3',
-    }
    api_headers = {
        'Host': 'apis.indeed.com',
        'content-type': 'application/json',
@@ -360,24 +287,35 @@ class IndeedScraper(Scraper):
        'accept-language': 'en-US,en;q=0.9',
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 193.1',
        'indeed-app-info': 'appv=193.1; appid=com.indeed.jobsearch; osv=16.6.1; os=ios; dtype=phone',
-        'indeed-co': 'US',
    }
-    api_payload = {
-        "query": """
+    job_search_query = """
        query GetJobData {{
-          jobData(input: {{
-            jobKeys: {job_keys_gql}
-          }}) {{
+          jobSearch(
+            what: "{what}"
+            location: {{ where: "{location}", radius: {radius}, radiusUnit: MILES }}
+            includeSponsoredResults: NONE
+            limit: 100
+            sort: DATE
+            {cursor}
+            {filters}
+          ) {{
+            pageInfo {{
+              nextCursor
+            }}
            results {{
+              trackingKey
              job {{
                key
                title
+                datePublished
+                dateOnIndeed
                description {{
                  html
                }}
                location {{
                  countryName
                  countryCode
+                  admin1Code
                  city
                  postalCode
                  streetAddress
@@ -399,10 +337,30 @@ class IndeedScraper(Scraper):
                  currencyCode
                }}
                attributes {{
+                  key
                  label
                }}
                employer {{
                  relativeCompanyPageUrl
+                  name
+                  dossier {{
+                      employerDetails {{
+                        addresses
+                        industry
+                        employeesLocalizedLabel
+                        revenueLocalizedLabel
+                        briefDescription
+                        ceoName
+                        ceoPhotoUrl
+                      }}
+                      images {{
+                            headerImageUrl
+                            squareLogoUrl
+                      }}
+                      links {{
+                        corporateWebsite
+                    }}
+                  }}
                }}
                recruit {{
                  viewJobUrl
@@ -414,4 +372,3 @@ class IndeedScraper(Scraper):
          }}
        }}
        """
-    }
--- a/src/jobspy/scrapers/linkedin/init.py
+++ b/src/jobspy/scrapers/linkedin/init.py
@@ -28,7 +28,6 @@ from ...jobs import (
 )
 from ..utils import (
    logger,
-    count_urgent_words,
    extract_emails_from_text,
    get_enum_from_job_type,
    currency_parser,
@@ -187,7 +186,6 @@ class LinkedInScraper(Scraper):
            except:
                date_posted = None
        benefits_tag = job_card.find("span", class_="result-benefits__text")
-        benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None
        if full_descr:
            description, job_type = self._get_job_description(job_url)

@@ -199,11 +197,9 @@ class LinkedInScraper(Scraper):
            date_posted=date_posted,
            job_url=job_url,
            compensation=compensation,
-            benefits=benefits,
            job_type=job_type,
            description=description,
            emails=extract_emails_from_text(description) if description else None,
-            num_urgent_words=count_urgent_words(description) if description else None,
        )

    def _get_job_description(
--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@@ -19,20 +19,6 @@ if not logger.handlers:
    logger.addHandler(console_handler)


-def count_urgent_words(description: str) -> int:
-    """
-    Count the number of urgent words or phrases in a job description.
-    """
-    urgent_patterns = re.compile(
-        r"\burgen(t|cy)|\bimmediate(ly)?\b|start asap|\bhiring (now|immediate(ly)?)\b",
-        re.IGNORECASE,
-    )
-    matches = re.findall(urgent_patterns, description)
-    count = len(matches)
-
-    return count
-
-
 def markdown_converter(description_html: str):
    if description_html is None:
        return None
--- a/src/jobspy/scrapers/ziprecruiter/init.py
+++ b/src/jobspy/scrapers/ziprecruiter/init.py
@@ -14,7 +14,6 @@ from concurrent.futures import ThreadPoolExecutor
 from .. import Scraper, ScraperInput, Site
 from ..utils import (
    logger,
-    count_urgent_words,
    extract_emails_from_text,
    create_session,
    markdown_converter
@@ -161,7 +160,6 @@ class ZipRecruiterScraper(Scraper):
            job_url=job_url,
            description=description,
            emails=extract_emails_from_text(description) if description else None,
-            num_urgent_words=count_urgent_words(description) if description else None,
        )

    def _get_cookies(self):