Easy apply (#95 )

* enh(glassdoor): easy apply filter * enh(ziprecruiter): easy apply * enh(indeed): use mobile headers * chore: version
enh(glassdoor): easy apply filter (#92 )
2026-03-05 12:04:33 -08:00 · 2024-02-02 17:47:15 -06:00 · 2024-02-01 19:42:24 -06:00 · 2024-01-28 21:50:41 -06:00 · 2024-01-22 20:22:32 -06:00
11 changed files with 201 additions and 120 deletions
--- a/README.md
+++ b/README.md
@@ -67,8 +67,9 @@ Optional
 ├── job_type (enum): fulltime, parttime, internship, contract
 ├── proxy (str): in format 'http://user:pass@host:port' or [https, socks]
 ├── is_remote (bool)
 ├── full_description (bool): fetches full description for Indeed / LinkedIn (much slower)
 ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
-├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
+├── easy_apply (bool): filters for jobs that are hosted on the job board site
 ├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
 ├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
 ```
--- a/poetry.lock
+++ b/poetry.lock
@@ -1053,16 +1053,6 @@ files = [
    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
    {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
    {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
    {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -2270,13 +2260,13 @@ test = ["flake8", "isort", "pytest"]
 [[package]]
 name = "tls-client"
-version = "0.2.1"
+version = "1.0"
 description = "Advanced Python HTTP Client."
 optional = false
 python-versions = "*"
 files = [
-    {file = "tls_client-0.2.1-py3-none-any.whl", hash = "sha256:124a710952b979d5e20b4e2b7879b7958d6e48a259d0f5b83101055eb173f0bd"},
+    {file = "tls_client-1.0-py3-none-any.whl", hash = "sha256:f1183f5e18cb31914bd62d11b350a33ea0293ea80fb91d69a3072821dece3e66"},
-    {file = "tls_client-0.2.1.tar.gz", hash = "sha256:473fb4c671d9d4ca6b818548ab6e955640dd589767bfce520830c5618c2f2e2b"},
+    {file = "tls_client-1.0.tar.gz", hash = "sha256:7f6de48ad4a0ef69b72682c76ce604155971e07b4bfb2148a36276194ae3e7a0"},
 ]
 [[package]]
@@ -2445,4 +2435,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "f966f3979873eec2c3b13460067f5aa414c69aa8ab5cd3239c1cfa564fcb5deb"
+content-hash = "404a77d78066cbb2ef71015562baf44aa11d12aac29a191c1ccc7758bfda598a"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.34"
+version = "1.1.38"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
 homepage = "https://github.com/Bunsly/JobSpy"
@@ -13,7 +13,7 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.10"
 requests = "^2.31.0"
-tls-client = "^0.2.1"
+tls-client = "*"
 beautifulsoup4 = "^4.12.2"
 pandas = "^2.1.0"
 NUMPY = "1.24.2"
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@@ -40,6 +40,7 @@ def scrape_jobs(
    country_indeed: str = "usa",
    hyperlinks: bool = False,
    proxy: Optional[str] = None,
    full_description: Optional[bool] = False,
    offset: Optional[int] = 0,
 ) -> pd.DataFrame:
    """
@@ -74,6 +75,7 @@ def scrape_jobs(
        is_remote=is_remote,
        job_type=job_type,
        easy_apply=easy_apply,
        full_description=full_description,
        results_wanted=results_wanted,
        offset=offset,
    )
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@@ -1,7 +1,7 @@
-from typing import Union, Optional
+from typing import Optional
 from datetime import date
 from enum import Enum
-from pydantic import BaseModel, validator
+from pydantic import BaseModel
 class JobType(Enum):
--- a/src/jobspy/scrapers/init.py
+++ b/src/jobspy/scrapers/init.py
@@ -19,6 +19,7 @@ class ScraperInput(BaseModel):
    is_remote: bool = False
    job_type: Optional[JobType] = None
    easy_apply: bool = None  # linkedin
    full_description: bool = False
    offset: int = 0
    results_wanted: int = 15
--- a/src/jobspy/scrapers/glassdoor/init.py
+++ b/src/jobspy/scrapers/glassdoor/init.py
@@ -5,12 +5,16 @@ jobspy.scrapers.glassdoor
 This module contains routines to scrape Glassdoor.
 """
 import json
-from typing import Optional, Any
+import requests
 from bs4 import BeautifulSoup
 from typing import Optional
 from datetime import datetime, timedelta
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from ..utils import count_urgent_words, extract_emails_from_text
 from .. import Scraper, ScraperInput, Site
 from ..exceptions import GlassdoorException
-from ..utils import create_session
+from ..utils import create_session, modify_and_get_description
 from ...jobs import (
    JobPost,
    Compensation,
@@ -66,50 +70,72 @@ class GlassdoorScraper(Scraper):
        jobs_data = res_json["data"]["jobListings"]["jobListings"]
        jobs = []
-        for i, job in enumerate(jobs_data):
+        with ThreadPoolExecutor(max_workers=self.jobs_per_page) as executor:
-            job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][
+            future_to_job_data = {executor.submit(self.process_job, job): job for job in jobs_data}
-                "linkItems"
+            for future in as_completed(future_to_job_data):
-            ][i]["url"]
+                job_data = future_to_job_data[future]
-            if job_url in self.seen_urls:
+                try:
-                continue
+                    job_post = future.result()
-            self.seen_urls.add(job_url)
+                    if job_post:
-            job = job["jobview"]
+                        jobs.append(job_post)
-            title = job["job"]["jobTitleText"]
+                except Exception as exc:
-            company_name = job["header"]["employerNameFromSearch"]
+                    raise GlassdoorException(f'Glassdoor generated an exception: {exc}')
            location_name = job["header"].get("locationName", "")
            location_type = job["header"].get("locationType", "")
            age_in_days = job["header"].get("ageInDays")
            is_remote, location = False, None
            date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days else None
            if location_type == "S":
                is_remote = True
            else:
                location = self.parse_location(location_name)
            compensation = self.parse_compensation(job["header"])
            job = JobPost(
                title=title,
                company_name=company_name,
                date_posted=date_posted,
                job_url=job_url,
                location=location,
                compensation=compensation,
                is_remote=is_remote
            )
            jobs.append(job)
        return jobs, self.get_cursor_for_page(
            res_json["data"]["jobListings"]["paginationCursors"], page_num + 1
        )
    def process_job(self, job_data):
        """Processes a single job and fetches its description."""
        job_id = job_data["jobview"]["job"]["listingId"]
        job_url = f'{self.url}job-listing/j?jl={job_id}'
        if job_url in self.seen_urls:
            return None
        self.seen_urls.add(job_url)
        job = job_data["jobview"]
        title = job["job"]["jobTitleText"]
        company_name = job["header"]["employerNameFromSearch"]
        company_id = job_data['jobview']['header']['employer']['id']
        location_name = job["header"].get("locationName", "")
        location_type = job["header"].get("locationType", "")
        age_in_days = job["header"].get("ageInDays")
        is_remote, location = False, None
        date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days else None
        if location_type == "S":
            is_remote = True
        else:
            location = self.parse_location(location_name)
        compensation = self.parse_compensation(job["header"])
        try:
            description = self.fetch_job_description(job_id)
        except Exception as e :
            description = None
        job_post = JobPost(
            title=title,
            company_url=f"{self.url}Overview/W-EI_IE{company_id}.htm" if company_id else None,
            company_name=company_name,
            date_posted=date_posted,
            job_url=job_url,
            location=location,
            compensation=compensation,
            is_remote=is_remote,
            description=description,
            emails=extract_emails_from_text(description) if description else None,
            num_urgent_words=count_urgent_words(description) if description else None,
        )
        return job_post
    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
        """
        Scrapes Glassdoor for jobs with scraper_input criteria.
        :param scraper_input: Information about job search criteria.
        :return: JobResponse containing a list of jobs.
        """
        scraper_input.results_wanted = min(900, scraper_input.results_wanted)
        self.country = scraper_input.country
        self.url = self.country.get_url()
@@ -143,6 +169,41 @@ class GlassdoorScraper(Scraper):
        return JobResponse(jobs=all_jobs)
    def fetch_job_description(self, job_id):
        """Fetches the job description for a single job ID."""
        url = f"{self.url}/graph"
        body = [
            {
                "operationName": "JobDetailQuery",
                "variables": {
                    "jl": job_id,
                    "queryString": "q",
                    "pageTypeEnum": "SERP"
                },
                "query": """
                query JobDetailQuery($jl: Long!, $queryString: String, $pageTypeEnum: PageTypeEnum) {
                    jobview: jobView(
                        listingId: $jl
                        contextHolder: {queryString: $queryString, pageTypeEnum: $pageTypeEnum}
                    ) {
                        job {
                            description
                            __typename
                        }
                        __typename
                    }
                }
                """
            }
        ]
        response = requests.post(url, json=body, headers=GlassdoorScraper.headers())
        if response.status_code != 200:
            return None
        data = response.json()[0]
        desc = data['data']['jobview']['job']['description']
        soup = BeautifulSoup(desc, 'html.parser')
        return modify_and_get_description(soup)
    @staticmethod
    def parse_compensation(data: dict) -> Optional[Compensation]:
        pay_period = data.get("payPeriod")
@@ -199,7 +260,7 @@ class GlassdoorScraper(Scraper):
            "operationName": "JobSearchResultsQuery",
            "variables": {
                "excludeJobListingIds": [],
-                "filterParams": [],
+                "filterParams": [{"filterKey": "applicationType", "values": "1"}] if scraper_input.easy_apply else [],
                "keyword": scraper_input.search_term,
                "numJobsToShow": 30,
                "locationType": location_type,
@@ -231,12 +292,11 @@ class GlassdoorScraper(Scraper):
        for job_type in JobType:
            if job_type_str in job_type.value:
                return [job_type]
        return None
    @staticmethod
-    def parse_location(location_name: str) -> Location:
+    def parse_location(location_name: str) -> Location | None:
        if not location_name or location_name == "Remote":
-            return None
+            return
        city, _, state = location_name.partition(", ")
        return Location(city=city, state=state)
@@ -245,7 +305,6 @@ class GlassdoorScraper(Scraper):
        for cursor_data in pagination_cursors:
            if cursor_data["pageNumber"] == page_num:
                return cursor_data["cursor"]
        return None
    @staticmethod
    def headers() -> dict:
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@@ -8,6 +8,7 @@ import re
 import math
 import io
 import json
 from typing import Any
 from datetime import datetime
 import urllib.parse
@@ -21,6 +22,7 @@ from ..utils import (
    extract_emails_from_text,
    create_session,
    get_enum_from_job_type,
    modify_and_get_description
 )
 from ...jobs import (
    JobPost,
@@ -43,7 +45,7 @@ class IndeedScraper(Scraper):
        site = Site(Site.INDEED)
        super().__init__(site, proxy=proxy)
-        self.jobs_per_page = 15
+        self.jobs_per_page = 25
        self.seen_urls = set()
    def scrape_page(
@@ -59,30 +61,12 @@ class IndeedScraper(Scraper):
        domain = self.country.indeed_domain_value
        self.url = f"https://{domain}.indeed.com"
        params = {
            "q": scraper_input.search_term,
            "l": scraper_input.location,
            "filter": 0,
            "start": scraper_input.offset + page * 10,
            "sort": "date"
        }
        if scraper_input.distance:
            params["radius"] = scraper_input.distance
        sc_values = []
        if scraper_input.is_remote:
            sc_values.append("attr(DSQF7)")
        if scraper_input.job_type:
            sc_values.append("jt({})".format(scraper_input.job_type.value))
        if sc_values:
            params["sc"] = "0kf:" + "".join(sc_values) + ";"
        try:
-            session = create_session(self.proxy, is_tls=True)
+            session = create_session(self.proxy)
            response = session.get(
-                f"{self.url}/jobs",
+                f"{self.url}/m/jobs",
                headers=self.get_headers(),
-                params=params,
+                params=self.add_params(scraper_input, page),
                allow_redirects=True,
                timeout_seconds=10,
            )
@@ -111,8 +95,8 @@ class IndeedScraper(Scraper):
        ):
            raise IndeedException("No jobs found.")
-        def process_job(job) -> JobPost | None:
+        def process_job(job: dict) -> JobPost | None:
-            job_url = f'{self.url}/jobs/viewjob?jk={job["jobkey"]}'
+            job_url = f'{self.url}/m/jobs/viewjob?jk={job["jobkey"]}'
            job_url_client = f'{self.url}/viewjob?jk={job["jobkey"]}'
            if job_url in self.seen_urls:
                return None
@@ -140,7 +124,8 @@ class IndeedScraper(Scraper):
            date_posted = datetime.fromtimestamp(timestamp_seconds)
            date_posted = date_posted.strftime("%Y-%m-%d")
-            description = self.get_description(job_url)
+            description = self.get_description(job_url) if scraper_input.full_description else None
            with io.StringIO(job["snippet"]) as f:
                soup_io = BeautifulSoup(f, "html.parser")
                li_elements = soup_io.find_all("li")
@@ -192,7 +177,7 @@ class IndeedScraper(Scraper):
        #: get first page to initialize session
        job_list, total_results = self.scrape_page(scraper_input, 0)
-        with ThreadPoolExecutor(max_workers=1) as executor:
+        with ThreadPoolExecutor(max_workers=10) as executor:
            futures: list[Future] = [
                executor.submit(self.scrape_page, scraper_input, page)
                for page in range(1, pages_to_process + 1)
@@ -246,9 +231,7 @@ class IndeedScraper(Scraper):
            return None
        soup = BeautifulSoup(job_description, "html.parser")
-        text_content = " ".join(soup.get_text(separator=" ").split()).strip()
+        return modify_and_get_description(soup)
        return text_content
    @staticmethod
    def get_job_type(job: dict) -> list[JobType] | None:
@@ -331,17 +314,14 @@ class IndeedScraper(Scraper):
    @staticmethod
    def get_headers():
        return {
-            "authority": "www.indeed.com",
+          'Host': 'www.indeed.com',
-            "accept": "*/*",
+          'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-            "accept-language": "en-US,en;q=0.9",
+          'sec-fetch-site': 'same-origin',
-            "referer": "https://www.indeed.com/viewjob?jk=fe6182337d72c7b1&tk=1hcbfcmd0k62t802&from=serp&vjs=3&advn=8132938064490989&adid=408692607&ad=-6NYlbfkN0A3Osc99MJFDKjquSk4WOGT28ALb_ad4QMtrHreCb9ICg6MiSVy9oDAp3evvOrI7Q-O9qOtQTg1EPbthP9xWtBN2cOuVeHQijxHjHpJC65TjDtftH3AXeINjBvAyDrE8DrRaAXl8LD3Fs1e_xuDHQIssdZ2Mlzcav8m5jHrA0fA64ZaqJV77myldaNlM7-qyQpy4AsJQfvg9iR2MY7qeC5_FnjIgjKIy_lNi9OPMOjGRWXA94CuvC7zC6WeiJmBQCHISl8IOBxf7EdJZlYdtzgae3593TFxbkd6LUwbijAfjax39aAuuCXy3s9C4YgcEP3TwEFGQoTpYu9Pmle-Ae1tHGPgsjxwXkgMm7Cz5mBBdJioglRCj9pssn-1u1blHZM4uL1nK9p1Y6HoFgPUU9xvKQTHjKGdH8d4y4ETyCMoNF4hAIyUaysCKdJKitC8PXoYaWhDqFtSMR4Jys8UPqUV&xkcb=SoDD-_M3JLQfWnQTDh0LbzkdCdPP&xpse=SoBa6_I3JLW9FlWZlB0PbzkdCdPP&sjdu=i6xVERweJM_pVUvgf-MzuaunBTY7G71J5eEX6t4DrDs5EMPQdODrX7Nn-WIPMezoqr5wA_l7Of-3CtoiUawcHw",
+          'sec-fetch-dest': 'document',
-            "sec-ch-ua": '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
+          'accept-language': 'en-US,en;q=0.9',
-            "sec-ch-ua-mobile": "?0",
+          'sec-fetch-mode': 'navigate',
-            "sec-ch-ua-platform": '"Windows"',
+          'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 192.0',
-            "sec-fetch-dest": "empty",
+          'referer': 'https://www.indeed.com/m/jobs?q=software%20intern&l=Dallas%2C%20TX&from=serpso&rq=1&rsIdx=3',
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "same-origin",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
        }
    @staticmethod
@@ -354,3 +334,29 @@ class IndeedScraper(Scraper):
            if taxonomy["label"] == "remote" and len(taxonomy["attributes"]) > 0:
                return True
        return False
    @staticmethod
    def add_params(scraper_input: ScraperInput, page: int) -> dict[str, str | Any]:
        params = {
            "q": scraper_input.search_term,
            "l": scraper_input.location,
            "filter": 0,
            "start": scraper_input.offset + page * 10,
            "sort": "date"
        }
        if scraper_input.distance:
            params["radius"] = scraper_input.distance
        sc_values = []
        if scraper_input.is_remote:
            sc_values.append("attr(DSQF7)")
        if scraper_input.job_type:
            sc_values.append("jt({})".format(scraper_input.job_type.value))
        if sc_values:
            params["sc"] = "0kf:" + "".join(sc_values) + ";"
        if scraper_input.easy_apply:
            params['iafilter'] = 1
        return params
--- a/src/jobspy/scrapers/linkedin/init.py
+++ b/src/jobspy/scrapers/linkedin/init.py
@@ -4,23 +4,36 @@ jobspy.scrapers.linkedin
 This module contains routines to scrape LinkedIn.
 """
 import time
 import random
 from typing import Optional
 from datetime import datetime
 import requests
 import time
 from requests.exceptions import ProxyError
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from threading import Lock
 from bs4.element import Tag
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse, urlunparse
 from .. import Scraper, ScraperInput, Site
 from ..exceptions import LinkedInException
 from ..utils import create_session
-from ...jobs import JobPost, Location, JobResponse, JobType, Country, Compensation
+from ...jobs import (
-from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type, currency_parser
+    JobPost,
    Location,
    JobResponse,
    JobType,
    Country,
    Compensation
 )
 from ..utils import (
    count_urgent_words,
    extract_emails_from_text,
    get_enum_from_job_type,
    currency_parser,
    modify_and_get_description
 )
 class LinkedInScraper(Scraper):
@@ -111,7 +124,7 @@ class LinkedInScraper(Scraper):
                # Call process_job directly without threading
                try:
-                    job_post = self.process_job(job_card, job_url)
+                    job_post = self.process_job(job_card, job_url, scraper_input.full_description)
                    if job_post:
                        job_list.append(job_post)
                except Exception as e:
@@ -123,7 +136,7 @@ class LinkedInScraper(Scraper):
        job_list = job_list[: scraper_input.results_wanted]
        return JobResponse(jobs=job_list)
-    def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]:
+    def process_job(self, job_card: Tag, job_url: str, full_descr: bool) -> Optional[JobPost]:
        salary_tag = job_card.find('span', class_='job-search-card__salary-info')
        compensation = None
@@ -160,7 +173,7 @@ class LinkedInScraper(Scraper):
            if metadata_card
            else None
        )
-        date_posted = None
+        date_posted = description = job_type = None
        if datetime_tag and "datetime" in datetime_tag.attrs:
            datetime_str = datetime_tag["datetime"]
            try:
@@ -169,9 +182,8 @@ class LinkedInScraper(Scraper):
                date_posted = None
        benefits_tag = job_card.find("span", class_="result-benefits__text")
        benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None
-
+        if full_descr:
-        # removed to speed up scraping
+            description, job_type = self.get_job_description(job_url)
        # description, job_type = self.get_job_description(job_url)
        return JobPost(
            title=title,
@@ -182,10 +194,10 @@ class LinkedInScraper(Scraper):
            job_url=job_url,
            compensation=compensation,
            benefits=benefits,
-            # job_type=job_type,
+            job_type=job_type,
-            # description=description,
+            description=description,
-            # emails=extract_emails_from_text(description) if description else None,
+            emails=extract_emails_from_text(description) if description else None,
-            # num_urgent_words=count_urgent_words(description) if description else None,
+            num_urgent_words=count_urgent_words(description) if description else None,
        )
    def get_job_description(
@@ -214,7 +226,7 @@ class LinkedInScraper(Scraper):
        description = None
        if div_content:
-            description = " ".join(div_content.get_text().split()).strip()
+            description = modify_and_get_description(div_content)
        def get_job_type(
            soup_job_type: BeautifulSoup,
--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@@ -8,6 +8,15 @@ from requests.adapters import HTTPAdapter, Retry
 from ..jobs import JobType
 def modify_and_get_description(soup):
    for li in soup.find_all('li'):
        li.string = "- " + li.get_text()
    description = soup.get_text(separator='\n').strip()
    description = re.sub(r'\n+', '\n', description)
    return description
 def count_urgent_words(description: str) -> int:
    """
    Count the number of urgent words or phrases in a job description.
--- a/src/jobspy/scrapers/ziprecruiter/init.py
+++ b/src/jobspy/scrapers/ziprecruiter/init.py
@@ -10,14 +10,13 @@ import re
 from datetime import datetime, date
 from typing import Optional, Tuple, Any
 import requests
 from bs4 import BeautifulSoup
 from concurrent.futures import ThreadPoolExecutor
 from .. import Scraper, ScraperInput, Site
 from ..exceptions import ZipRecruiterException
 from ..utils import count_urgent_words, extract_emails_from_text, create_session
 from ...jobs import JobPost, Compensation, Location, JobResponse, JobType, Country
 from ..utils import count_urgent_words, extract_emails_from_text, create_session, modify_and_get_description
 class ZipRecruiterScraper(Scraper):
@@ -107,9 +106,9 @@ class ZipRecruiterScraper(Scraper):
        title = job.get("name")
        job_url = job.get("job_url")
-        description = BeautifulSoup(
+        job_description_html = job.get("job_description", "").strip()
-            job.get("job_description", "").strip(), "html.parser"
+        description_soup = BeautifulSoup(job_description_html, "html.parser")
-        ).get_text()
+        description = modify_and_get_description(description_soup)
        company = job["hiring_company"].get("name") if "hiring_company" in job else None
        country_value = "usa" if job.get("job_country") == "US" else "canada"
@@ -184,6 +183,8 @@ class ZipRecruiterScraper(Scraper):
                job_type_value = "part_time"
            else:
                job_type_value = scraper_input.job_type.value
        if scraper_input.easy_apply:
            params['zipapply'] = 1
        if job_type_value:
            params[
Author	SHA1	Message	Date
Cullen Watson	13c7694474	Easy apply (#95 ) * enh(glassdoor): easy apply filter * enh(ziprecruiter): easy apply * enh(indeed): use mobile headers * chore: version	2024-02-02 17:47:15 -06:00
Cullen Watson	bbe46fe3f4	enh(glassdoor): easy apply filter (#92 )	2024-02-01 19:42:24 -06:00
Cullen Watson	b97c73ffd6	fix: clean description (#88 )	2024-01-28 21:50:41 -06:00
Cullen Watson	5b3627b244	enh: full description param (#85 )	2024-01-22 20:22:32 -06:00