From 191a5ea9fd71f64b0c10ec73a69137ec17bc3b47 Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Wed, 14 Feb 2024 16:00:33 -0600
Subject: [PATCH] enh: description format

---
 README.md                                    |   2 +-
 poetry.lock                                  |  13 +-
 pyproject.toml                               |   3 +-
 src/jobspy/__init__.py                       |  54 +--
 src/jobspy/jobs/__init__.py                  |   5 +
 src/jobspy/scrapers/__init__.py              |  12 +-
 src/jobspy/scrapers/glassdoor/__init__.py    | 260 +++++------
 src/jobspy/scrapers/indeed/__init__.py       | 459 +++++++++----------
 src/jobspy/scrapers/linkedin/__init__.py     | 174 ++++---
 src/jobspy/scrapers/utils.py                 |  21 +-
 src/jobspy/scrapers/ziprecruiter/__init__.py | 176 +++----
 11 files changed, 589 insertions(+), 590 deletions(-)

diff --git a/README.md b/README.md
index 8ae668f..1604885 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ Optional
 ├── job_type (enum): fulltime, parttime, internship, contract
 ├── proxy (str): in format 'http://user:pass@host:port' or [https, socks]
 ├── is_remote (bool)
-├── full_description (bool): fetches full description for LinkedIn (slower)
+├── linkedin_fetch_description (bool): fetches full description for LinkedIn (slower)
 ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
 ├── easy_apply (bool): filters for jobs that are hosted on the job board site
 ├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids
diff --git a/poetry.lock b/poetry.lock
index d573844..d4581f9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -524,6 +524,17 @@ files = [
     {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
 ]
 
+[[package]]
+name = "html2text"
+version = "2020.1.16"
+description = "Turn HTML into equivalent Markdown-structured text."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "html2text-2020.1.16-py3-none-any.whl", hash = "sha256:c7c629882da0cf377d66f073329ccf34a12ed2adf0169b9285ae4e63ef54c82b"},
+    {file = "html2text-2020.1.16.tar.gz", hash = "sha256:e296318e16b059ddb97f7a8a1d6a5c1d7af4544049a01e261731d2d5cc277bbb"},
+]
+
 [[package]]
 name = "idna"
 version = "3.4"
@@ -2435,4 +2446,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "404a77d78066cbb2ef71015562baf44aa11d12aac29a191c1ccc7758bfda598a"
+content-hash = "40cdc19a57cba0d21ff4f0fcfa53e14a073fcccd9f2a871440e056ab6e8fade0"
diff --git a/pyproject.toml b/pyproject.toml
index e939358..8fd7ba7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.44"
+version = "1.1.45"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
 homepage = "https://github.com/Bunsly/JobSpy"
@@ -18,6 +18,7 @@ beautifulsoup4 = "^4.12.2"
 pandas = "^2.1.0"
 NUMPY = "1.24.2"
 pydantic = "^2.3.0"
+html2text = "^2020.1.16"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py
index 7370c37..c4c87d9 100644
--- a/src/jobspy/__init__.py
+++ b/src/jobspy/__init__.py
@@ -15,17 +15,6 @@ from .scrapers.exceptions import (
     GlassdoorException,
 )
 
-SCRAPER_MAPPING = {
-    Site.LINKEDIN: LinkedInScraper,
-    Site.INDEED: IndeedScraper,
-    Site.ZIP_RECRUITER: ZipRecruiterScraper,
-    Site.GLASSDOOR: GlassdoorScraper,
-}
-
-
-def _map_str_to_site(site_name: str) -> Site:
-    return Site[site_name.upper()]
-
 
 def scrape_jobs(
     site_name: str | list[str] | Site | list[Site] | None = None,
@@ -39,7 +28,8 @@ def scrape_jobs(
     country_indeed: str = "usa",
     hyperlinks: bool = False,
     proxy: str | None = None,
-    full_description: bool | None = False,
+    description_format: str = "markdown",
+    linkedin_fetch_description: bool | None = False,
     linkedin_company_ids: list[int] | None = None,
     offset: int | None = 0,
     hours_old: int = None,
@@ -49,6 +39,15 @@ def scrape_jobs(
     Simultaneously scrapes job data from multiple job sites.
     :return: results_wanted: pandas dataframe containing job data
     """
+    SCRAPER_MAPPING = {
+        Site.LINKEDIN: LinkedInScraper,
+        Site.INDEED: IndeedScraper,
+        Site.ZIP_RECRUITER: ZipRecruiterScraper,
+        Site.GLASSDOOR: GlassdoorScraper,
+    }
+
+    def map_str_to_site(site_name: str) -> Site:
+        return Site[site_name.upper()]
 
     def get_enum_from_value(value_str):
         for job_type in JobType:
@@ -61,16 +60,15 @@ def scrape_jobs(
     def get_site_type():
         site_types = list(Site)
         if isinstance(site_name, str):
-            site_types = [_map_str_to_site(site_name)]
+            site_types = [map_str_to_site(site_name)]
         elif isinstance(site_name, Site):
             site_types = [site_name]
         elif isinstance(site_name, list):
             site_types = [
-                _map_str_to_site(site) if isinstance(site, str) else site
+                map_str_to_site(site) if isinstance(site, str) else site
                 for site in site_name
             ]
         return site_types
-
     country_enum = Country.from_string(country_indeed)
 
     scraper_input = ScraperInput(
@@ -82,7 +80,8 @@ def scrape_jobs(
         is_remote=is_remote,
         job_type=job_type,
         easy_apply=easy_apply,
-        full_description=full_description,
+        description_format=description_format,
+        linkedin_fetch_description=linkedin_fetch_description,
         results_wanted=results_wanted,
         linkedin_company_ids=linkedin_company_ids,
         offset=offset,
@@ -92,22 +91,7 @@ def scrape_jobs(
     def scrape_site(site: Site) -> Tuple[str, JobResponse]:
         scraper_class = SCRAPER_MAPPING[site]
         scraper = scraper_class(proxy=proxy)
-
-        try:
-            scraped_data: JobResponse = scraper.scrape(scraper_input)
-        except (LinkedInException, IndeedException, ZipRecruiterException) as lie:
-            raise lie
-        except Exception as e:
-            if site == Site.LINKEDIN:
-                raise LinkedInException(str(e))
-            if site == Site.INDEED:
-                raise IndeedException(str(e))
-            if site == Site.ZIP_RECRUITER:
-                raise ZipRecruiterException(str(e))
-            if site == Site.GLASSDOOR:
-                raise GlassdoorException(str(e))
-            else:
-                raise e
+        scraped_data: JobResponse = scraper.scrape(scraper_input)
         return site.value, scraped_data
 
     site_to_jobs_dict = {}
@@ -188,8 +172,6 @@ def scrape_jobs(
             "emails",
             "description",
         ]
-        jobs_formatted_df = jobs_df[desired_order]
+        return jobs_df[desired_order].sort_values(by=['site', 'date_posted'], ascending=[True, False])
     else:
-        jobs_formatted_df = pd.DataFrame()
-
-    return jobs_formatted_df.sort_values(by=['site', 'date_posted'], ascending=[True, False])
+        return pd.DataFrame()
diff --git a/src/jobspy/jobs/__init__.py b/src/jobspy/jobs/__init__.py
index a819897..c4bbb43 100644
--- a/src/jobspy/jobs/__init__.py
+++ b/src/jobspy/jobs/__init__.py
@@ -210,6 +210,11 @@ class Compensation(BaseModel):
     currency: Optional[str] = "USD"
 
 
+class DescriptionFormat(Enum):
+    MARKDOWN = "markdown"
+    HTML = "html"
+
+
 class JobPost(BaseModel):
     title: str
     company_name: str
diff --git a/src/jobspy/scrapers/__init__.py b/src/jobspy/scrapers/__init__.py
index f180d0d..0c14252 100644
--- a/src/jobspy/scrapers/__init__.py
+++ b/src/jobspy/scrapers/__init__.py
@@ -1,4 +1,11 @@
-from ..jobs import Enum, BaseModel, JobType, JobResponse, Country
+from ..jobs import (
+    Enum,
+    BaseModel,
+    JobType,
+    JobResponse,
+    Country,
+    DescriptionFormat
+)
 
 
 class Site(Enum):
@@ -18,9 +25,10 @@ class ScraperInput(BaseModel):
     is_remote: bool = False
     job_type: JobType | None = None
     easy_apply: bool | None = None
-    full_description: bool = False
     offset: int = 0
+    linkedin_fetch_description: bool = False
     linkedin_company_ids: list[int] | None = None
+    description_format: DescriptionFormat | None = DescriptionFormat.MARKDOWN
 
     results_wanted: int = 15
     hours_old: int | None = None
diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/jobspy/scrapers/glassdoor/__init__.py
index 3352a2a..9bffd16 100644
--- a/src/jobspy/scrapers/glassdoor/__init__.py
+++ b/src/jobspy/scrapers/glassdoor/__init__.py
@@ -13,7 +13,11 @@ from ..utils import count_urgent_words, extract_emails_from_text
 
 from .. import Scraper, ScraperInput, Site
 from ..exceptions import GlassdoorException
-from ..utils import create_session
+from ..utils import (
+    create_session,
+    markdown_converter,
+    logger
+)
 from ...jobs import (
     JobPost,
     Compensation,
@@ -21,6 +25,7 @@ from ...jobs import (
     Location,
     JobResponse,
     JobType,
+    DescriptionFormat
 )
 
 
@@ -32,13 +37,57 @@ class GlassdoorScraper(Scraper):
         site = Site(Site.GLASSDOOR)
         super().__init__(site, proxy=proxy)
 
-        self.url = None
+        self.base_url = None
         self.country = None
         self.session = None
+        self.scraper_input = None
         self.jobs_per_page = 30
         self.seen_urls = set()
 
-    def fetch_jobs_page(
+    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
+        """
+        Scrapes Glassdoor for jobs with scraper_input criteria.
+        :param scraper_input: Information about job search criteria.
+        :return: JobResponse containing a list of jobs.
+        """
+        self.scraper_input = scraper_input
+        self.scraper_input.results_wanted = min(900, scraper_input.results_wanted)
+        self.base_url = self.scraper_input.country.get_url()
+
+        location_id, location_type = self._get_location(
+            scraper_input.location, scraper_input.is_remote
+        )
+        if location_type is None:
+            return JobResponse(jobs=[])
+        all_jobs: list[JobPost] = []
+        cursor = None
+        max_pages = 30
+        self.session = create_session(self.proxy, is_tls=False, has_retry=True)
+        self.session.get(self.base_url)
+
+        try:
+            for page in range(
+                1 + (scraper_input.offset // self.jobs_per_page),
+                min(
+                    (scraper_input.results_wanted // self.jobs_per_page) + 2,
+                    max_pages + 1,
+                ),
+            ):
+                try:
+                    jobs, cursor = self._fetch_jobs_page(
+                        scraper_input, location_id, location_type, page, cursor
+                    )
+                    all_jobs.extend(jobs)
+                    if len(all_jobs) >= scraper_input.results_wanted:
+                        all_jobs = all_jobs[: scraper_input.results_wanted]
+                        break
+                except Exception as e:
+                    raise GlassdoorException(str(e))
+        except Exception as e:
+            raise GlassdoorException(str(e))
+        return JobResponse(jobs=all_jobs)
+
+    def _fetch_jobs_page(
         self,
         scraper_input: ScraperInput,
         location_id: int,
@@ -49,12 +98,13 @@ class GlassdoorScraper(Scraper):
         """
         Scrapes a page of Glassdoor for jobs with scraper_input criteria
         """
+        self.scraper_input = scraper_input
         try:
-            payload = self.add_payload(
-                scraper_input, location_id, location_type, page_num, cursor
+            payload = self._add_payload(
+                location_id, location_type, page_num, cursor
             )
             response = self.session.post(
-                f"{self.url}/graph", headers=self.headers(), timeout=10, data=payload
+                f"{self.base_url}/graph", headers=self.headers, timeout=10, data=payload
             )
             if response.status_code != 200:
                 raise GlassdoorException(
@@ -70,7 +120,7 @@ class GlassdoorScraper(Scraper):
 
         jobs = []
         with ThreadPoolExecutor(max_workers=self.jobs_per_page) as executor:
-            future_to_job_data = {executor.submit(self.process_job, job): job for job in jobs_data}
+            future_to_job_data = {executor.submit(self._process_job, job): job for job in jobs_data}
             for future in as_completed(future_to_job_data):
                 try:
                     job_post = future.result()
@@ -83,10 +133,12 @@ class GlassdoorScraper(Scraper):
             res_json["data"]["jobListings"]["paginationCursors"], page_num + 1
         )
 
-    def process_job(self, job_data):
-        """Processes a single job and fetches its description."""
+    def _process_job(self, job_data):
+        """
+        Processes a single job and fetches its description.
+        """
         job_id = job_data["jobview"]["job"]["listingId"]
-        job_url = f'{self.url}job-listing/j?jl={job_id}'
+        job_url = f'{self.base_url}job-listing/j?jl={job_id}'
         if job_url in self.seen_urls:
             return None
         self.seen_urls.add(job_url)
@@ -106,15 +158,13 @@ class GlassdoorScraper(Scraper):
             location = self.parse_location(location_name)
 
         compensation = self.parse_compensation(job["header"])
-
         try:
-            description = self.fetch_job_description(job_id)
+            description = self._fetch_job_description(job_id)
         except:
             description = None
-
-        job_post = JobPost(
+        return JobPost(
             title=title,
-            company_url=f"{self.url}Overview/W-EI_IE{company_id}.htm" if company_id else None,
+            company_url=f"{self.base_url}Overview/W-EI_IE{company_id}.htm" if company_id else None,
             company_name=company_name,
             date_posted=date_posted,
             job_url=job_url,
@@ -125,53 +175,12 @@ class GlassdoorScraper(Scraper):
             emails=extract_emails_from_text(description) if description else None,
             num_urgent_words=count_urgent_words(description) if description else None,
         )
-        return job_post
 
-    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
+    def _fetch_job_description(self, job_id):
         """
-        Scrapes Glassdoor for jobs with scraper_input criteria.
-        :param scraper_input: Information about job search criteria.
-        :return: JobResponse containing a list of jobs.
+        Fetches the job description for a single job ID.
         """
-        scraper_input.results_wanted = min(900, scraper_input.results_wanted)
-        self.country = scraper_input.country
-        self.url = self.country.get_url()
-
-        location_id, location_type = self.get_location(
-            scraper_input.location, scraper_input.is_remote
-        )
-        all_jobs: list[JobPost] = []
-        cursor = None
-        max_pages = 30
-        self.session = create_session(self.proxy, is_tls=False, has_retry=True)
-        self.session.get(self.url)
-
-        try:
-            for page in range(
-                1 + (scraper_input.offset // self.jobs_per_page),
-                min(
-                    (scraper_input.results_wanted // self.jobs_per_page) + 2,
-                    max_pages + 1,
-                ),
-            ):
-                try:
-                    jobs, cursor = self.fetch_jobs_page(
-                        scraper_input, location_id, location_type, page, cursor
-                    )
-                    all_jobs.extend(jobs)
-                    if len(all_jobs) >= scraper_input.results_wanted:
-                        all_jobs = all_jobs[: scraper_input.results_wanted]
-                        break
-                except Exception as e:
-                    raise GlassdoorException(str(e))
-        except Exception as e:
-            raise GlassdoorException(str(e))
-
-        return JobResponse(jobs=all_jobs)
-
-    def fetch_job_description(self, job_id):
-        """Fetches the job description for a single job ID."""
-        url = f"{self.url}/graph"
+        url = f"{self.base_url}/graph"
         body = [
             {
                 "operationName": "JobDetailQuery",
@@ -196,48 +205,28 @@ class GlassdoorScraper(Scraper):
                 """
             }
         ]
-        response = requests.post(url, json=body, headers=GlassdoorScraper.headers())
-        if response.status_code != 200:
+        res = requests.post(url, json=body, headers=self.headers)
+        if res.status_code != 200:
             return None
-        data = response.json()[0]
+        data = res.json()[0]
         desc = data['data']['jobview']['job']['description']
-        return desc
+        return markdown_converter(desc) if self.scraper_input.description_format == DescriptionFormat.MARKDOWN else desc
 
-    @staticmethod
-    def parse_compensation(data: dict) -> Optional[Compensation]:
-        pay_period = data.get("payPeriod")
-        adjusted_pay = data.get("payPeriodAdjustedPay")
-        currency = data.get("payCurrency", "USD")
-
-        if not pay_period or not adjusted_pay:
-            return None
-
-        interval = None
-        if pay_period == "ANNUAL":
-            interval = CompensationInterval.YEARLY
-        elif pay_period:
-            interval = CompensationInterval.get_interval(pay_period)
-        min_amount = int(adjusted_pay.get("p10") // 1)
-        max_amount = int(adjusted_pay.get("p90") // 1)
-
-        return Compensation(
-            interval=interval,
-            min_amount=min_amount,
-            max_amount=max_amount,
-            currency=currency,
-        )
-
-    def get_location(self, location: str, is_remote: bool) -> (int, str):
+    def _get_location(self, location: str, is_remote: bool) -> (int, str):
         if not location or is_remote:
             return "11047", "STATE"  # remote options
-        url = f"{self.url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
+        url = f"{self.base_url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
         session = create_session(self.proxy, has_retry=True)
-        response = session.get(url)
-        if response.status_code != 200:
-            raise GlassdoorException(
-                f"bad response status code: {response.status_code}"
-            )
-        items = response.json()
+        res = session.get(url)
+        if res.status_code != 200:
+            if res.status_code == 429:
+                logger.error(f'429 Response - Blocked by Glassdoor for too many requests')
+                return None, None
+            else:
+                logger.error(f'Glassdoor response status code {res.status_code}')
+                return None, None
+        items = res.json()
+
         if not items:
             raise ValueError(f"Location '{location}' not found on Glassdoor")
         location_type = items[0]["locationType"]
@@ -249,18 +238,16 @@ class GlassdoorScraper(Scraper):
             location_type = "COUNTRY"
         return int(items[0]["locationId"]), location_type
 
-    @staticmethod
-    def add_payload(
-        scraper_input,
+    def _add_payload(
+        self,
         location_id: int,
         location_type: str,
         page_num: int,
         cursor: str | None = None,
     ) -> str:
-        # `fromage` is the posting time filter in days
-        fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None
+        fromage = max(self.scraper_input.hours_old // 24, 1) if self.scraper_input.hours_old else None
         filter_params = []
-        if scraper_input.easy_apply:
+        if self.scraper_input.easy_apply:
             filter_params.append({"filterKey": "applicationType", "values": "1"})
         if fromage:
             filter_params.append({"filterKey": "fromAge", "values": str(fromage)})
@@ -269,7 +256,7 @@ class GlassdoorScraper(Scraper):
             "variables": {
                 "excludeJobListingIds": [],
                 "filterParams": filter_params,
-                "keyword": scraper_input.search_term,
+                "keyword": self.scraper_input.search_term,
                 "numJobsToShow": 30,
                 "locationType": location_type,
                 "locationId": int(location_id),
@@ -446,13 +433,34 @@ class GlassdoorScraper(Scraper):
             }
             """
         }
-
-        if scraper_input.job_type:
+        if self.scraper_input.job_type:
             payload["variables"]["filterParams"].append(
-                {"filterKey": "jobType", "values": scraper_input.job_type.value[0]}
+                {"filterKey": "jobType", "values": self.scraper_input.job_type.value[0]}
             )
         return json.dumps([payload])
 
+    @staticmethod
+    def parse_compensation(data: dict) -> Optional[Compensation]:
+        pay_period = data.get("payPeriod")
+        adjusted_pay = data.get("payPeriodAdjustedPay")
+        currency = data.get("payCurrency", "USD")
+        if not pay_period or not adjusted_pay:
+            return None
+
+        interval = None
+        if pay_period == "ANNUAL":
+            interval = CompensationInterval.YEARLY
+        elif pay_period:
+            interval = CompensationInterval.get_interval(pay_period)
+        min_amount = int(adjusted_pay.get("p10") // 1)
+        max_amount = int(adjusted_pay.get("p90") // 1)
+        return Compensation(
+            interval=interval,
+            min_amount=min_amount,
+            max_amount=max_amount,
+            currency=currency,
+        )
+
     @staticmethod
     def get_job_type_enum(job_type_str: str) -> list[JobType] | None:
         for job_type in JobType:
@@ -472,27 +480,21 @@ class GlassdoorScraper(Scraper):
             if cursor_data["pageNumber"] == page_num:
                 return cursor_data["cursor"]
 
-    @staticmethod
-    def headers() -> dict:
-        """
-        Returns headers needed for requests
-        :return: dict - Dictionary containing headers
-        """
-        return {
-            "authority": "www.glassdoor.com",
-            "accept": "*/*",
-            "accept-language": "en-US,en;q=0.9",
-            "apollographql-client-name": "job-search-next",
-            "apollographql-client-version": "4.65.5",
-            "content-type": "application/json",
-            "gd-csrf-token": "Ft6oHEWlRZrxDww95Cpazw:0pGUrkb2y3TyOpAIqF2vbPmUXoXVkD3oEGDVkvfeCerceQ5-n8mBg3BovySUIjmCPHCaW0H2nQVdqzbtsYqf4Q:wcqRqeegRUa9MVLJGyujVXB7vWFPjdaS1CtrrzJq-ok",
-            "origin": "https://www.glassdoor.com",
-            "referer": "https://www.glassdoor.com/",
-            "sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
-            "sec-ch-ua-mobile": "?0",
-            "sec-ch-ua-platform": '"macOS"',
-            "sec-fetch-dest": "empty",
-            "sec-fetch-mode": "cors",
-            "sec-fetch-site": "same-origin",
-            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
-        }
+    headers = {
+        "authority": "www.glassdoor.com",
+        "accept": "*/*",
+        "accept-language": "en-US,en;q=0.9",
+        "apollographql-client-name": "job-search-next",
+        "apollographql-client-version": "4.65.5",
+        "content-type": "application/json",
+        "gd-csrf-token": "Ft6oHEWlRZrxDww95Cpazw:0pGUrkb2y3TyOpAIqF2vbPmUXoXVkD3oEGDVkvfeCerceQ5-n8mBg3BovySUIjmCPHCaW0H2nQVdqzbtsYqf4Q:wcqRqeegRUa9MVLJGyujVXB7vWFPjdaS1CtrrzJq-ok",
+        "origin": "https://www.glassdoor.com",
+        "referer": "https://www.glassdoor.com/",
+        "sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": '"macOS"',
+        "sec-fetch-dest": "empty",
+        "sec-fetch-mode": "cors",
+        "sec-fetch-site": "same-origin",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+    }
diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py
index 5b05cbd..27c3d34 100644
--- a/src/jobspy/scrapers/indeed/__init__.py
+++ b/src/jobspy/scrapers/indeed/__init__.py
@@ -21,6 +21,7 @@ from ..utils import (
     extract_emails_from_text,
     create_session,
     get_enum_from_job_type,
+    markdown_converter,
     logger
 )
 from ...jobs import (
@@ -30,6 +31,7 @@ from ...jobs import (
     Location,
     JobResponse,
     JobType,
+    DescriptionFormat
 )
 from .. import Scraper, ScraperInput, Site
 
@@ -39,121 +41,23 @@ class IndeedScraper(Scraper):
         """
         Initializes IndeedScraper with the Indeed job search url
         """
-        self.url = None
-        self.country = None
+        self.scraper_input = None
+        self.jobs_per_page = 25
+        self.num_workers = 10
+        self.seen_urls = set()
+        self.base_url = None
+        self.api_url = "https://apis.indeed.com/graphql"
         site = Site(Site.INDEED)
         super().__init__(site, proxy=proxy)
 
-        self.jobs_per_page = 25
-        self.seen_urls = set()
-
-    def scrape_page(
-        self, scraper_input: ScraperInput, page: int
-    ) -> list[JobPost]:
-        """
-        Scrapes a page of Indeed for jobs with scraper_input criteria
-        :param scraper_input:
-        :param page:
-        :return: jobs found on page, total number of jobs found for search
-        """
-        job_list = []
-        self.country = scraper_input.country
-        domain = self.country.indeed_domain_value
-        self.url = f"https://{domain}.indeed.com"
-
-        try:
-            session = create_session(self.proxy)
-            response = session.get(
-                f"{self.url}/m/jobs",
-                headers=self.get_headers(),
-                params=self.add_params(scraper_input, page),
-                allow_redirects=True,
-                timeout_seconds=10,
-            )
-            if response.status_code not in range(200, 400):
-                raise IndeedException(
-                    f"bad response with status code: {response.status_code}"
-                )
-        except Exception as e:
-            if "Proxy responded with" in str(e):
-                logger.error(f'Indeed: Bad proxy')
-            else:
-                logger.error(f'Indeed: {str(e)}')
-            return job_list
-
-        soup = BeautifulSoup(response.content, "html.parser")
-        if "did not match any jobs" in response.text:
-            return job_list
-
-        jobs = IndeedScraper.parse_jobs(
-            soup
-        )  #: can raise exception, handled by main scrape function
-
-        if (
-            not jobs.get("metaData", {})
-            .get("mosaicProviderJobCardsModel", {})
-            .get("results")
-        ):
-            raise IndeedException("No jobs found.")
-
-        def process_job(job: dict, job_detailed: dict) -> JobPost | None:
-            job_url = f'{self.url}/m/jobs/viewjob?jk={job["jobkey"]}'
-            job_url_client = f'{self.url}/viewjob?jk={job["jobkey"]}'
-            if job_url in self.seen_urls:
-                return None
-            self.seen_urls.add(job_url)
-            description = job_detailed['description']['html']
-
-
-            job_type = IndeedScraper.get_job_type(job)
-            timestamp_seconds = job["pubDate"] / 1000
-            date_posted = datetime.fromtimestamp(timestamp_seconds)
-            date_posted = date_posted.strftime("%Y-%m-%d")
-
-            job_post = JobPost(
-                title=job["normTitle"],
-                description=description,
-                company_name=job["company"],
-                company_url=f"{self.url}{job_detailed['employer']['relativeCompanyPageUrl']}" if job_detailed['employer'] else None,
-                location=Location(
-                    city=job.get("jobLocationCity"),
-                    state=job.get("jobLocationState"),
-                    country=self.country,
-                ),
-                job_type=job_type,
-                compensation=self.get_compensation(job, job_detailed),
-                date_posted=date_posted,
-                job_url=job_url_client,
-                emails=extract_emails_from_text(description) if description else None,
-                num_urgent_words=count_urgent_words(description)
-                if description
-                else None,
-                is_remote=IndeedScraper.is_job_remote(job, job_detailed, description)
-
-            )
-            return job_post
-
-        workers = 10
-        jobs = jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
-        job_keys = [job['jobkey'] for job in jobs]
-        jobs_detailed = self.get_job_details(job_keys)
-
-        with ThreadPoolExecutor(max_workers=workers) as executor:
-            job_results: list[Future] = [
-                executor.submit(process_job, job, job_detailed['job']) for job, job_detailed in zip(jobs, jobs_detailed)
-            ]
-
-        job_list = [result.result() for result in job_results if result.result()]
-
-        return job_list
-
     def scrape(self, scraper_input: ScraperInput) -> JobResponse:
         """
         Scrapes Indeed for jobs with scraper_input criteria
         :param scraper_input:
         :return: job_response
         """
-        job_list = self.scrape_page(scraper_input, 0)
+        self.scraper_input = scraper_input
+        job_list = self._scrape_page()
         pages_processed = 1
 
         while len(self.seen_urls) < scraper_input.results_wanted:
@@ -162,7 +66,7 @@ class IndeedScraper(Scraper):
 
             with ThreadPoolExecutor(max_workers=10) as executor:
                 futures: list[Future] = [
-                    executor.submit(self.scrape_page, scraper_input, page + pages_processed)
+                    executor.submit(self._scrape_page, page + pages_processed)
                     for page in range(pages_to_process)
                 ]
 
@@ -184,8 +88,136 @@ class IndeedScraper(Scraper):
 
         return JobResponse(jobs=job_list)
 
+    def _scrape_page(self, page: int=0) -> list[JobPost]:
+        """
+        Scrapes a page of Indeed for jobs with scraper_input criteria
+        :param page:
+        :return: jobs found on page, total number of jobs found for search
+        """
+        job_list = []
+        domain = self.scraper_input.country.indeed_domain_value
+        self.base_url = f"https://{domain}.indeed.com"
+
+        try:
+            session = create_session(self.proxy)
+            response = session.get(
+                f"{self.base_url}/m/jobs",
+                headers=self.headers,
+                params=self._add_params(page),
+            )
+            if response.status_code not in range(200, 400):
+                if response.status_code == 429:
+                    logger.error(f'429 Response - Blocked by Indeed for too many requests')
+                else:
+                    logger.error(f'Indeed response status code {response.status_code}')
+                return job_list
+
+        except Exception as e:
+            if "Proxy responded with" in str(e):
+                logger.error(f'Indeed: Bad proxy')
+            else:
+                logger.error(f'Indeed: {str(e)}')
+            return job_list
+
+        soup = BeautifulSoup(response.content, "html.parser")
+        if "did not match any jobs" in response.text:
+            return job_list
+
+        jobs = IndeedScraper._parse_jobs(soup)
+        if (
+            not jobs.get("metaData", {})
+            .get("mosaicProviderJobCardsModel", {})
+            .get("results")
+        ):
+            raise IndeedException("No jobs found.")
+
+        jobs = jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
+        job_keys = [job['jobkey'] for job in jobs]
+        jobs_detailed = self._get_job_details(job_keys)
+
+        with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
+            job_results: list[Future] = [
+                executor.submit(self._process_job, job, job_detailed['job']) for job, job_detailed in zip(jobs, jobs_detailed)
+            ]
+
+        job_list = [result.result() for result in job_results if result.result()]
+
+        return job_list
+
+    def _process_job(self, job: dict, job_detailed: dict) -> JobPost | None:
+        job_url = f'{self.base_url}/m/jobs/viewjob?jk={job["jobkey"]}'
+        job_url_client = f'{self.base_url}/viewjob?jk={job["jobkey"]}'
+        if job_url in self.seen_urls:
+            return None
+        self.seen_urls.add(job_url)
+        description = job_detailed['description']['html']
+        description = markdown_converter(description) if self.scraper_input.description_format == DescriptionFormat.MARKDOWN else description
+        job_type = self._get_job_type(job)
+        timestamp_seconds = job["pubDate"] / 1000
+        date_posted = datetime.fromtimestamp(timestamp_seconds)
+        date_posted = date_posted.strftime("%Y-%m-%d")
+        return JobPost(
+            title=job["normTitle"],
+            description=description,
+            company_name=job["company"],
+            company_url=f"{self.base_url}{job_detailed['employer']['relativeCompanyPageUrl']}" if job_detailed[
+                'employer'] else None,
+            location=Location(
+                city=job.get("jobLocationCity"),
+                state=job.get("jobLocationState"),
+                country=self.scraper_input.country,
+            ),
+            job_type=job_type,
+            compensation=self._get_compensation(job, job_detailed),
+            date_posted=date_posted,
+            job_url=job_url_client,
+            emails=extract_emails_from_text(description) if description else None,
+            num_urgent_words=count_urgent_words(description) if description else None,
+            is_remote=self._is_job_remote(job, job_detailed, description)
+        )
+
+    def _get_job_details(self, job_keys: list[str]) -> dict:
+        """
+        Queries the GraphQL endpoint for detailed job information for the given job keys.
+        """
+        job_keys_gql = '[' + ', '.join(f'"{key}"' for key in job_keys) + ']'
+        payload = dict(self.api_payload)
+        payload["query"] = self.api_payload["query"].format(job_keys_gql=job_keys_gql)
+        response = requests.post(self.api_url, headers=self.api_headers, json=payload, proxies=self.proxy)
+        if response.status_code == 200:
+            return response.json()['data']['jobData']['results']
+        else:
+            return {}
+
+    def _add_params(self, page: int) -> dict[str, str | Any]:
+        fromage = max(self.scraper_input.hours_old // 24, 1) if self.scraper_input.hours_old else None
+        params = {
+            "q": self.scraper_input.search_term,
+            "l": self.scraper_input.location if self.scraper_input.location else self.scraper_input.country.value[0].split(',')[-1],
+            "filter": 0,
+            "start": self.scraper_input.offset + page * 10,
+            "sort": "date",
+            "fromage": fromage,
+        }
+        if self.scraper_input.distance:
+            params["radius"] = self.scraper_input.distance
+
+        sc_values = []
+        if self.scraper_input.is_remote:
+            sc_values.append("attr(DSQF7)")
+        if self.scraper_input.job_type:
+            sc_values.append("jt({})".format(self.scraper_input.job_type.value[0]))
+
+        if sc_values:
+            params["sc"] = "0kf:" + "".join(sc_values) + ";"
+
+        if self.scraper_input.easy_apply:
+            params['iafilter'] = 1
+
+        return params
+
     @staticmethod
-    def get_job_type(job: dict) -> list[JobType] | None:
+    def _get_job_type(job: dict) -> list[JobType] | None:
         """
         Parses the job to get list of job types
         :param job:
@@ -204,7 +236,7 @@ class IndeedScraper(Scraper):
         return job_types
 
     @staticmethod
-    def get_compensation(job: dict, job_detailed: dict) -> Compensation:
+    def _get_compensation(job: dict, job_detailed: dict) -> Compensation:
         """
         Parses the job to get
         :param job:
@@ -213,7 +245,7 @@ class IndeedScraper(Scraper):
         """
         comp = job_detailed['compensation']['baseSalary']
         if comp:
-            interval = IndeedScraper.get_correct_interval(comp['unitOfWork'])
+            interval = IndeedScraper._get_correct_interval(comp['unitOfWork'])
             if interval:
                 return Compensation(
                     interval=interval,
@@ -242,18 +274,13 @@ class IndeedScraper(Scraper):
         return compensation
 
     @staticmethod
-    def parse_jobs(soup: BeautifulSoup) -> dict:
+    def _parse_jobs(soup: BeautifulSoup) -> dict:
         """
         Parses the jobs from the soup object
         :param soup:
         :return: jobs
         """
-
         def find_mosaic_script() -> Tag | None:
-            """
-            Finds jobcards script tag
-            :return: script_tag
-            """
             script_tags = soup.find_all("script")
 
             for tag in script_tags:
@@ -266,7 +293,6 @@ class IndeedScraper(Scraper):
             return None
 
         script_tag = find_mosaic_script()
-
         if script_tag:
             script_str = script_tag.string
             pattern = r'window.mosaic.providerData\["mosaic-provider-jobcards"\]\s*=\s*({.*?});'
@@ -283,49 +309,7 @@ class IndeedScraper(Scraper):
             )
 
     @staticmethod
-    def get_headers():
-        return {
-          'Host': 'www.indeed.com',
-          'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-          'sec-fetch-site': 'same-origin',
-          'sec-fetch-dest': 'document',
-          'accept-language': 'en-US,en;q=0.9',
-          'sec-fetch-mode': 'navigate',
-          'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 192.0',
-          'referer': 'https://www.indeed.com/m/jobs?q=software%20intern&l=Dallas%2C%20TX&from=serpso&rq=1&rsIdx=3',
-        }
-
-    @staticmethod
-    def add_params(scraper_input: ScraperInput, page: int) -> dict[str, str | Any]:
-        # `fromage` is the posting time filter in days
-        fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None
-        params = {
-            "q": scraper_input.search_term,
-            "l": scraper_input.location if scraper_input.location else scraper_input.country.value[0].split(',')[-1],
-            "filter": 0,
-            "start": scraper_input.offset + page * 10,
-            "sort": "date",
-            "fromage": fromage,
-        }
-        if scraper_input.distance:
-            params["radius"] = scraper_input.distance
-
-        sc_values = []
-        if scraper_input.is_remote:
-            sc_values.append("attr(DSQF7)")
-        if scraper_input.job_type:
-            sc_values.append("jt({})".format(scraper_input.job_type.value[0]))
-
-        if sc_values:
-            params["sc"] = "0kf:" + "".join(sc_values) + ";"
-
-        if scraper_input.easy_apply:
-            params['iafilter'] = 1
-
-        return params
-
-    @staticmethod
-    def is_job_remote(job: dict, job_detailed: dict, description: str) -> bool:
+    def _is_job_remote(job: dict, job_detailed: dict, description: str) -> bool:
         remote_keywords = ['remote', 'work from home', 'wfh']
         is_remote_in_attributes = any(
             any(keyword in attr['label'].lower() for keyword in remote_keywords)
@@ -342,86 +326,8 @@ class IndeedScraper(Scraper):
         )
         return is_remote_in_attributes or is_remote_in_description or is_remote_in_location or is_remote_in_taxonomy
 
-    def get_job_details(self, job_keys: list[str]) -> dict:
-        """
-        Queries the GraphQL endpoint for detailed job information for the given job keys.
-        """
-        url = "https://apis.indeed.com/graphql"
-        headers = {
-            'Host': 'apis.indeed.com',
-            'content-type': 'application/json',
-            'indeed-api-key': '161092c2017b5bbab13edb12461a62d5a833871e7cad6d9d475304573de67ac8',
-            'accept': 'application/json',
-            'indeed-locale': 'en-US',
-            'accept-language': 'en-US,en;q=0.9',
-            'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 193.1',
-            'indeed-app-info': 'appv=193.1; appid=com.indeed.jobsearch; osv=16.6.1; os=ios; dtype=phone',
-            'indeed-co': 'US',
-        }
-
-        job_keys_gql = '[' + ', '.join(f'"{key}"' for key in job_keys) + ']'
-
-        payload = {
-            "query": f"""
-            query GetJobData {{
-              jobData(input: {{
-                jobKeys: {job_keys_gql}
-              }}) {{
-                results {{
-                  job {{
-                    key
-                    title
-                    description {{
-                      html
-                    }}
-                    location {{
-                      countryName
-                      countryCode
-                      city
-                      postalCode
-                      streetAddress
-                      formatted {{
-                        short
-                        long
-                      }}
-                    }}
-                    compensation {{
-                      baseSalary {{
-                        unitOfWork
-                        range {{
-                          ... on Range {{
-                            min
-                            max
-                          }}
-                        }}
-                      }}
-                      currencyCode
-                    }}
-                    attributes {{
-                      label
-                    }}
-                    employer {{
-                      relativeCompanyPageUrl
-                    }}
-                    recruit {{
-                      viewJobUrl
-                      detailedSalary
-                      workSchedule
-                    }}
-                  }}
-                }}
-              }}
-            }}
-            """
-        }
-        response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
-        if response.status_code == 200:
-            return response.json()['data']['jobData']['results']
-        else:
-            return {}
-
     @staticmethod
-    def get_correct_interval(interval: str) -> CompensationInterval:
+    def _get_correct_interval(interval: str) -> CompensationInterval:
         interval_mapping = {
             "DAY": "DAILY",
             "YEAR": "YEARLY",
@@ -434,3 +340,78 @@ class IndeedScraper(Scraper):
             return CompensationInterval[mapped_interval]
         else:
             raise ValueError(f"Unsupported interval: {interval}")
+
+    headers =  {
+      'Host': 'www.indeed.com',
+      'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'sec-fetch-site': 'same-origin',
+      'sec-fetch-dest': 'document',
+      'accept-language': 'en-US,en;q=0.9',
+      'sec-fetch-mode': 'navigate',
+      'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 192.0',
+      'referer': 'https://www.indeed.com/m/jobs?q=software%20intern&l=Dallas%2C%20TX&from=serpso&rq=1&rsIdx=3',
+    }
+    api_headers = {
+        'Host': 'apis.indeed.com',
+        'content-type': 'application/json',
+        'indeed-api-key': '161092c2017b5bbab13edb12461a62d5a833871e7cad6d9d475304573de67ac8',
+        'accept': 'application/json',
+        'indeed-locale': 'en-US',
+        'accept-language': 'en-US,en;q=0.9',
+        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Indeed App 193.1',
+        'indeed-app-info': 'appv=193.1; appid=com.indeed.jobsearch; osv=16.6.1; os=ios; dtype=phone',
+        'indeed-co': 'US',
+    }
+    api_payload = {
+        "query": """
+        query GetJobData {{
+          jobData(input: {{
+            jobKeys: {job_keys_gql}
+          }}) {{
+            results {{
+              job {{
+                key
+                title
+                description {{
+                  html
+                }}
+                location {{
+                  countryName
+                  countryCode
+                  city
+                  postalCode
+                  streetAddress
+                  formatted {{
+                    short
+                    long
+                  }}
+                }}
+                compensation {{
+                  baseSalary {{
+                    unitOfWork
+                    range {{
+                      ... on Range {{
+                        min
+                        max
+                      }}
+                    }}
+                  }}
+                  currencyCode
+                }}
+                attributes {{
+                  label
+                }}
+                employer {{
+                  relativeCompanyPageUrl
+                }}
+                recruit {{
+                  viewJobUrl
+                  detailedSalary
+                  workSchedule
+                }}
+              }}
+            }}
+          }}
+        }}
+        """
+    }
diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py
index 4833337..ad17cd4 100644
--- a/src/jobspy/scrapers/linkedin/__init__.py
+++ b/src/jobspy/scrapers/linkedin/__init__.py
@@ -25,26 +25,30 @@ from ...jobs import (
     JobResponse,
     JobType,
     Country,
-    Compensation
+    Compensation,
+    DescriptionFormat
 )
 from ..utils import (
+    logger,
     count_urgent_words,
     extract_emails_from_text,
     get_enum_from_job_type,
-    currency_parser
+    currency_parser,
+    markdown_converter
 )
 
 
 class LinkedInScraper(Scraper):
-    DELAY = 3
+    base_url = "https://www.linkedin.com"
+    delay = 3
 
     def __init__(self, proxy: Optional[str] = None):
         """
         Initializes LinkedInScraper with the LinkedIn job search url
         """
+        self.scraper_input = None
         site = Site(Site.LINKEDIN)
         self.country = "worldwide"
-        self.url = "https://www.linkedin.com"
         super().__init__(site, proxy=proxy)
 
     def scrape(self, scraper_input: ScraperInput) -> JobResponse:
@@ -53,28 +57,16 @@ class LinkedInScraper(Scraper):
         :param scraper_input:
         :return: job_response
         """
+        self.scraper_input = scraper_input
         job_list: list[JobPost] = []
         seen_urls = set()
         url_lock = Lock()
         page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0
-
         seconds_old = (
             scraper_input.hours_old * 3600
             if scraper_input.hours_old
             else None
         )
-
-        def job_type_code(job_type_enum):
-            mapping = {
-                JobType.FULL_TIME: "F",
-                JobType.PART_TIME: "P",
-                JobType.INTERNSHIP: "I",
-                JobType.CONTRACT: "C",
-                JobType.TEMPORARY: "T",
-            }
-
-            return mapping.get(job_type_enum, "")
-
         continue_search = lambda: len(job_list) < scraper_input.results_wanted and page < 1000
 
         while continue_search():
@@ -84,7 +76,7 @@ class LinkedInScraper(Scraper):
                 "location": scraper_input.location,
                 "distance": scraper_input.distance,
                 "f_WT": 2 if scraper_input.is_remote else None,
-                "f_JT": job_type_code(scraper_input.job_type)
+                "f_JT": self.job_type_code(scraper_input.job_type)
                 if scraper_input.job_type
                 else None,
                 "pageNum": 0,
@@ -97,23 +89,25 @@ class LinkedInScraper(Scraper):
             params = {k: v for k, v in params.items() if v is not None}
             try:
                 response = session.get(
-                    f"{self.url}/jobs-guest/jobs/api/seeMoreJobPostings/search?",
+                    f"{self.base_url}/jobs-guest/jobs/api/seeMoreJobPostings/search?",
                     params=params,
                     allow_redirects=True,
                     proxies=self.proxy,
-                    headers=self.headers(),
+                    headers=self.headers,
                     timeout=10,
                 )
-                response.raise_for_status()
-
-            except requests.HTTPError as e:
-                raise LinkedInException(
-                    f"bad response status code: {e.response.status_code}"
-                )
-            except ProxyError as e:
-                raise LinkedInException("bad proxy")
+                if response.status_code not in range(200, 400):
+                    if response.status_code == 429:
+                        logger.error(f'429 Response - Blocked by LinkedIn for too many requests')
+                    else:
+                        logger.error(f'LinkedIn response status code {response.status_code}')
+                    return JobResponse(job_list=job_list)
             except Exception as e:
-                raise LinkedInException(str(e))
+                if "Proxy responded with" in str(e):
+                    logger.error(f'Indeed: Bad proxy')
+                else:
+                    logger.error(f'Indeed: {str(e)}')
+                return JobResponse(job_list=job_list)
 
             soup = BeautifulSoup(response.text, "html.parser")
             job_cards = soup.find_all("div", class_="base-search-card")
@@ -126,29 +120,29 @@ class LinkedInScraper(Scraper):
                 if href_tag and "href" in href_tag.attrs:
                     href = href_tag.attrs["href"].split("?")[0]
                     job_id = href.split("-")[-1]
-                    job_url = f"{self.url}/jobs/view/{job_id}"
+                    job_url = f"{self.base_url}/jobs/view/{job_id}"
 
                 with url_lock:
                     if job_url in seen_urls:
                         continue
                     seen_urls.add(job_url)
-
-                # Call process_job directly without threading
                 try:
-                    job_post = self.process_job(job_card, job_url, scraper_input.full_description)
+                    job_post = self._process_job(job_card, job_url, scraper_input.linkedin_fetch_description)
                     if job_post:
                         job_list.append(job_post)
+                    if not continue_search():
+                       break
                 except Exception as e:
-                    raise LinkedInException("Exception occurred while processing jobs")
+                    raise LinkedInException(str(e))
 
             if continue_search():
-                time.sleep(random.uniform(LinkedInScraper.DELAY, LinkedInScraper.DELAY + 2))
+                time.sleep(random.uniform(self.delay, self.delay + 2))
                 page += 25
 
         job_list = job_list[: scraper_input.results_wanted]
         return JobResponse(jobs=job_list)
 
-    def process_job(self, job_card: Tag, job_url: str, full_descr: bool) -> Optional[JobPost]:
+    def _process_job(self, job_card: Tag, job_url: str, full_descr: bool) -> Optional[JobPost]:
         salary_tag = job_card.find('span', class_='job-search-card__salary-info')
 
         compensation = None
@@ -178,7 +172,7 @@ class LinkedInScraper(Scraper):
         company = company_a_tag.get_text(strip=True) if company_a_tag else "N/A"
 
         metadata_card = job_card.find("div", class_="base-search-card__metadata")
-        location = self.get_location(metadata_card)
+        location = self._get_location(metadata_card)
 
         datetime_tag = (
             metadata_card.find("time", class_="job-search-card__listdate")
@@ -190,12 +184,12 @@ class LinkedInScraper(Scraper):
             datetime_str = datetime_tag["datetime"]
             try:
                 date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
-            except Exception as e:
+            except:
                 date_posted = None
         benefits_tag = job_card.find("span", class_="result-benefits__text")
         benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None
         if full_descr:
-            description, job_type = self.get_job_description(job_url)
+            description, job_type = self._get_job_description(job_url)
 
         return JobPost(
             title=title,
@@ -212,7 +206,7 @@ class LinkedInScraper(Scraper):
             num_urgent_words=count_urgent_words(description) if description else None,
         )
 
-    def get_job_description(
+    def _get_job_description(
         self, job_page_url: str
     ) -> tuple[None, None] | tuple[str | None, tuple[str | None, JobType | None]]:
         """
@@ -222,11 +216,9 @@ class LinkedInScraper(Scraper):
         """
         try:
             session = create_session(is_tls=False, has_retry=True)
-            response = session.get(job_page_url, timeout=5, proxies=self.proxy)
+            response = session.get(job_page_url, headers=self.headers, timeout=5, proxies=self.proxy)
             response.raise_for_status()
-        except requests.HTTPError as e:
-            return None, None
-        except Exception as e:
+        except:
             return None, None
         if response.url == "https://www.linkedin.com/signup":
             return None, None
@@ -241,40 +233,13 @@ class LinkedInScraper(Scraper):
                 for attr in list(tag.attrs):
                     del tag[attr]
                 return tag
-
             div_content = remove_attributes(div_content)
             description = div_content.prettify(formatter="html")
+            if self.scraper_input.description_format == DescriptionFormat.MARKDOWN:
+                description = markdown_converter(description)
+        return description, self._parse_job_type(soup)
 
-        def get_job_type(
-            soup_job_type: BeautifulSoup,
-        ) -> list[JobType] | None:
-            """
-            Gets the job type from job page
-            :param soup_job_type:
-            :return: JobType
-            """
-            h3_tag = soup_job_type.find(
-                "h3",
-                class_="description__job-criteria-subheader",
-                string=lambda text: "Employment type" in text,
-            )
-
-            employment_type = None
-            if h3_tag:
-                employment_type_span = h3_tag.find_next_sibling(
-                    "span",
-                    class_="description__job-criteria-text description__job-criteria-text--criteria",
-                )
-                if employment_type_span:
-                    employment_type = employment_type_span.get_text(strip=True)
-                    employment_type = employment_type.lower()
-                    employment_type = employment_type.replace("-", "")
-
-            return [get_enum_from_job_type(employment_type)] if employment_type else []
-
-        return description, get_job_type(soup)
-
-    def get_location(self, metadata_card: Optional[Tag]) -> Location:
+    def _get_location(self, metadata_card: Optional[Tag]) -> Location:
         """
         Extracts the location data from the job metadata card.
         :param metadata_card
@@ -299,25 +264,50 @@ class LinkedInScraper(Scraper):
                 location = Location(
                     city=city,
                     state=state,
-                    country=Country.from_string(country),
+                    country=Country.from_string(country)
                 )
-
         return location
 
     @staticmethod
-    def headers() -> dict:
+    def _parse_job_type(soup_job_type: BeautifulSoup) -> list[JobType] | None:
+        """
+        Gets the job type from job page
+        :param soup_job_type:
+        :return: JobType
+        """
+        h3_tag = soup_job_type.find(
+            "h3",
+            class_="description__job-criteria-subheader",
+            string=lambda text: "Employment type" in text,
+        )
+        employment_type = None
+        if h3_tag:
+            employment_type_span = h3_tag.find_next_sibling(
+                "span",
+                class_="description__job-criteria-text description__job-criteria-text--criteria",
+            )
+            if employment_type_span:
+                employment_type = employment_type_span.get_text(strip=True)
+                employment_type = employment_type.lower()
+                employment_type = employment_type.replace("-", "")
+
+        return [get_enum_from_job_type(employment_type)] if employment_type else []
+
+    @staticmethod
+    def job_type_code(job_type_enum: JobType) -> str:
         return {
-            "authority": "www.linkedin.com",
-            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            "accept-language": "en-US,en;q=0.9",
-            "cache-control": "max-age=0",
-            "sec-ch-ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
-            # 'sec-ch-ua-mobile': '?0',
-            # 'sec-ch-ua-platform': '"macOS"',
-            # 'sec-fetch-dest': 'document',
-            # 'sec-fetch-mode': 'navigate',
-            # 'sec-fetch-site': 'none',
-            # 'sec-fetch-user': '?1',
-            "upgrade-insecure-requests": "1",
-            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-        }
+            JobType.FULL_TIME: "F",
+            JobType.PART_TIME: "P",
+            JobType.INTERNSHIP: "I",
+            JobType.CONTRACT: "C",
+            JobType.TEMPORARY: "T",
+        }.get(job_type_enum, "")
+
+    headers = {
+        "authority": "www.linkedin.com",
+        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+        "accept-language": "en-US,en;q=0.9",
+        "cache-control": "max-age=0",
+        "upgrade-insecure-requests": "1",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    }
diff --git a/src/jobspy/scrapers/utils.py b/src/jobspy/scrapers/utils.py
index 5366193..5f54569 100644
--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@@ -2,13 +2,16 @@ import re
 import logging
 import numpy as np
 
+import html2text
 import tls_client
 import requests
 from requests.adapters import HTTPAdapter, Retry
 
 from ..jobs import JobType
 
+text_maker = html2text.HTML2Text()
 logger = logging.getLogger("JobSpy")
+logger.propagate = False
 if not logger.handlers:
     logger.setLevel(logging.ERROR)
     console_handler = logging.StreamHandler()
@@ -32,6 +35,17 @@ def count_urgent_words(description: str) -> int:
     return count
 
 
+def markdown_converter(description_html: str):
+    if description_html is None:
+        return ""
+    text_maker.ignore_links = False
+    try:
+        markdown = text_maker.handle(description_html)
+        return markdown.strip()
+    except AssertionError as e:
+        return ""
+
+
 def extract_emails_from_text(text: str) -> list[str] | None:
     if not text:
         return None
@@ -42,14 +56,10 @@ def extract_emails_from_text(text: str) -> list[str] | None:
 def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bool = False, delay: int = 1) -> requests.Session:
     """
     Creates a requests session with optional tls, proxy, and retry settings.
-
     :return: A session object
     """
     if is_tls:
-        session = tls_client.Session(
-            client_identifier="chrome112",
-            random_tls_extension_order=True,
-        )
+        session = tls_client.Session(random_tls_extension_order=True)
         session.proxies = proxy
     else:
         session = requests.Session()
@@ -66,7 +76,6 @@ def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bo
 
             session.mount('http://', adapter)
             session.mount('https://', adapter)
-
     return session
 
 
diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py
index fd7f25a..af3ca7b 100644
--- a/src/jobspy/scrapers/ziprecruiter/__init__.py
+++ b/src/jobspy/scrapers/ziprecruiter/__init__.py
@@ -6,33 +6,76 @@ This module contains routines to scrape ZipRecruiter.
 """
 import math
 import time
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Optional, Tuple, Any
 
 from concurrent.futures import ThreadPoolExecutor
 
 from .. import Scraper, ScraperInput, Site
-from ..exceptions import ZipRecruiterException
-from ...jobs import JobPost, Compensation, Location, JobResponse, JobType, Country
-from ..utils import count_urgent_words, extract_emails_from_text, create_session
+from ..utils import (
+    logger,
+    count_urgent_words,
+    extract_emails_from_text,
+    create_session,
+    markdown_converter
+)
+from ...jobs import (
+    JobPost,
+    Compensation,
+    Location,
+    JobResponse,
+    JobType,
+    Country,
+    DescriptionFormat
+)
 
 
 class ZipRecruiterScraper(Scraper):
+    base_url = "https://www.ziprecruiter.com"
+    api_url = "https://api.ziprecruiter.com"
+
     def __init__(self, proxy: Optional[str] = None):
         """
         Initializes ZipRecruiterScraper with the ZipRecruiter job search url
         """
-        site = Site(Site.ZIP_RECRUITER)
-        self.url = "https://www.ziprecruiter.com"
+        self.scraper_input = None
         self.session = create_session(proxy)
-        self.get_cookies()
-        super().__init__(site, proxy=proxy)
+        self._get_cookies()
+        super().__init__(Site.ZIP_RECRUITER, proxy=proxy)
 
+        self.delay = 5
         self.jobs_per_page = 20
         self.seen_urls = set()
-        self.delay = 5
 
-    def find_jobs_in_page(
+    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
+        """
+        Scrapes ZipRecruiter for jobs with scraper_input criteria.
+        :param scraper_input: Information about job search criteria.
+        :return: JobResponse containing a list of jobs.
+        """
+        self.scraper_input = scraper_input
+        job_list: list[JobPost] = []
+        continue_token = None
+
+        max_pages = math.ceil(scraper_input.results_wanted / self.jobs_per_page)
+        for page in range(1, max_pages + 1):
+            if len(job_list) >= scraper_input.results_wanted:
+                break
+            if page > 1:
+                time.sleep(self.delay)
+
+            jobs_on_page, continue_token = self._find_jobs_in_page(
+                scraper_input, continue_token
+            )
+            if jobs_on_page:
+                job_list.extend(jobs_on_page)
+            else:
+                break
+            if not continue_token:
+                break
+        return JobResponse(jobs=job_list[: scraper_input.results_wanted])
+
+    def _find_jobs_in_page(
         self, scraper_input: ScraperInput, continue_token: str | None = None
     ) -> Tuple[list[JobPost], Optional[str]]:
         """
@@ -41,73 +84,51 @@ class ZipRecruiterScraper(Scraper):
         :param continue_token:
         :return: jobs found on page
         """
-        params = self.add_params(scraper_input)
+        jobs_list = []
+        params = self._add_params(scraper_input)
         if continue_token:
             params["continue_from"] = continue_token
         try:
-            response = self.session.get(
-                f"https://api.ziprecruiter.com/jobs-app/jobs",
-                headers=self.headers(),
+            res= self.session.get(
+                f"{self.api_url}/jobs-app/jobs",
+                headers=self.headers,
                 params=params
             )
-            if response.status_code != 200:
-                raise ZipRecruiterException(
-                    f"bad response status code: {response.status_code}"
-                )
+            if res.status_code not in range(200, 400):
+                if res.status_code == 429:
+                    logger.error(f'429 Response - Blocked by ZipRecruiter for too many requests')
+                else:
+                    logger.error(f'ZipRecruiter response status code {res.status_code}')
+                return jobs_list, ""
         except Exception as e:
-            if "Proxy responded with non 200 code" in str(e):
-                raise ZipRecruiterException("bad proxy")
-            raise ZipRecruiterException(str(e))
+            if "Proxy responded with" in str(e):
+                logger.error(f'Indeed: Bad proxy')
+            else:
+                logger.error(f'Indeed: {str(e)}')
+            return jobs_list, ""
 
-        response_data = response.json()
-        jobs_list = response_data.get("jobs", [])
-        next_continue_token = response_data.get("continue", None)
 
+        res_data = res.json()
+        jobs_list = res_data.get("jobs", [])
+        next_continue_token = res_data.get("continue", None)
         with ThreadPoolExecutor(max_workers=self.jobs_per_page) as executor:
-            job_results = [executor.submit(self.process_job, job) for job in jobs_list]
+            job_results = [executor.submit(self._process_job, job) for job in jobs_list]
 
         job_list = list(filter(None, (result.result() for result in job_results)))
         return job_list, next_continue_token
 
-    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
+    def _process_job(self, job: dict) -> JobPost | None:
         """
-        Scrapes ZipRecruiter for jobs with scraper_input criteria.
-        :param scraper_input: Information about job search criteria.
-        :return: JobResponse containing a list of jobs.
+        Processes an individual job dict from the response
         """
-        job_list: list[JobPost] = []
-        continue_token = None
-
-        max_pages = math.ceil(scraper_input.results_wanted / self.jobs_per_page)
-
-        for page in range(1, max_pages + 1):
-            if len(job_list) >= scraper_input.results_wanted:
-                break
-
-            if page > 1:
-                time.sleep(self.delay)
-
-            jobs_on_page, continue_token = self.find_jobs_in_page(
-                scraper_input, continue_token
-            )
-            if jobs_on_page:
-                job_list.extend(jobs_on_page)
-
-            if not continue_token:
-                break
-
-        return JobResponse(jobs=job_list[: scraper_input.results_wanted])
-
-    def process_job(self, job: dict) -> JobPost | None:
-        """Processes an individual job dict from the response"""
         title = job.get("name")
-        job_url = f"https://www.ziprecruiter.com/jobs//j?lvk={job['listing_key']}"
+        job_url = f"{self.base_url}/jobs//j?lvk={job['listing_key']}"
         if job_url in self.seen_urls:
             return
         self.seen_urls.add(job_url)
 
         description = job.get("job_description", "").strip()
-
+        description = markdown_converter(description) if self.scraper_input.description_format == DescriptionFormat.MARKDOWN else description
         company = job.get("hiring_company", {}).get("name")
         country_value = "usa" if job.get("job_country") == "US" else "canada"
         country_enum = Country.from_string(country_value)
@@ -115,11 +136,10 @@ class ZipRecruiterScraper(Scraper):
         location = Location(
             city=job.get("job_city"), state=job.get("job_state"), country=country_enum
         )
-        job_type = ZipRecruiterScraper.get_job_type_enum(
+        job_type = self._get_job_type_enum(
             job.get("employment_type", "").replace("_", "").lower()
         )
         date_posted = datetime.fromisoformat(job['posted_time'].rstrip("Z")).date()
-
         return JobPost(
             title=title,
             company_name=company,
@@ -144,20 +164,19 @@ class ZipRecruiterScraper(Scraper):
             num_urgent_words=count_urgent_words(description) if description else None,
         )
 
-    def get_cookies(self):
-        url="https://api.ziprecruiter.com/jobs-app/event"
+    def _get_cookies(self):
         data="event_type=session&logged_in=false&number_of_retry=1&property=model%3AiPhone&property=os%3AiOS&property=locale%3Aen_us&property=app_build_number%3A4734&property=app_version%3A91.0&property=manufacturer%3AApple&property=timestamp%3A2024-01-12T12%3A04%3A42-06%3A00&property=screen_height%3A852&property=os_version%3A16.6.1&property=source%3Ainstall&property=screen_width%3A393&property=device_model%3AiPhone%2014%20Pro&property=brand%3AApple"
-        self.session.post(url, data=data, headers=ZipRecruiterScraper.headers())
+        self.session.post(f"{self.api_url}/jobs-app/event", data=data, headers=self.headers)
 
     @staticmethod
-    def get_job_type_enum(job_type_str: str) -> list[JobType] | None:
+    def _get_job_type_enum(job_type_str: str) -> list[JobType] | None:
         for job_type in JobType:
             if job_type_str in job_type.value:
                 return [job_type]
         return None
 
     @staticmethod
-    def add_params(scraper_input) -> dict[str, str | Any]:
+    def _add_params(scraper_input) -> dict[str, str | Any]:
         params = {
             "search": scraper_input.search_term,
             "location": scraper_input.location,
@@ -177,24 +196,15 @@ class ZipRecruiterScraper(Scraper):
             params["remote"] = 1
         if scraper_input.distance:
             params["radius"] = scraper_input.distance
+        return {k: v for k, v in params.items() if v is not None}
 
-        params = {k: v for k, v in params.items() if v is not None}
-
-        return params
-
-    @staticmethod
-    def headers() -> dict:
-        """
-        Returns headers needed for requests
-        :return: dict - Dictionary containing headers
-        """
-        return {
-            "Host": "api.ziprecruiter.com",
-            "accept": "*/*",
-            "x-zr-zva-override": "100000000;vid:ZT1huzm_EQlDTVEc",
-            "x-pushnotificationid": "0ff4983d38d7fc5b3370297f2bcffcf4b3321c418f5c22dd152a0264707602a0",
-            "x-deviceid": "D77B3A92-E589-46A4-8A39-6EF6F1D86006",
-            "user-agent": "Job Search/87.0 (iPhone; CPU iOS 16_6_1 like Mac OS X)",
-            "authorization": "Basic YTBlZjMyZDYtN2I0Yy00MWVkLWEyODMtYTI1NDAzMzI0YTcyOg==",
-            "accept-language": "en-US,en;q=0.9",
-        }
+    headers = {
+        "Host": "api.ziprecruiter.com",
+        "accept": "*/*",
+        "x-zr-zva-override": "100000000;vid:ZT1huzm_EQlDTVEc",
+        "x-pushnotificationid": "0ff4983d38d7fc5b3370297f2bcffcf4b3321c418f5c22dd152a0264707602a0",
+        "x-deviceid": "D77B3A92-E589-46A4-8A39-6EF6F1D86006",
+        "user-agent": "Job Search/87.0 (iPhone; CPU iOS 16_6_1 like Mac OS X)",
+        "authorization": "Basic YTBlZjMyZDYtN2I0Yy00MWVkLWEyODMtYTI1NDAzMzI0YTcyOg==",
+        "accept-language": "en-US,en;q=0.9",
+    }