chore: readme

2023-10-10 11:20:24 -05:00 · 2023-10-10 11:20:24 -05:00 · 35bdff65a9
parent 4aa832d3e2
commit 35bdff65a9
12 changed files with 225 additions and 214 deletions
--- a/README.md
+++ b/README.md
@ -33,37 +33,19 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
 ```python
 from jobspy import scrape_jobs
 import pandas as pd
-jobs: pd.DataFrame = scrape_jobs(
+jobs = scrape_jobs(
    site_name=["indeed", "linkedin", "zip_recruiter"],
    search_term="software engineer",
    location="Dallas, TX",
    results_wanted=10,
    country_indeed='USA'  # only needed for indeed
    # use if you want to use a proxy
    # proxy="http://jobspy:5a4vpWtj8EeJ2hoYzk@ca.smartproxy.com:20001",
    # offset=25 # use if you want to start at a specific offset
 )
 print(f"Found {len(jobs)} jobs")
 print(jobs.head())
 jobs.to_csv("jobs.csv", index=False)
-# formatting for pandas
+# output to .xlsx
 pd.set_option('display.max_columns', None)
 pd.set_option('display.max_rows', None)
 pd.set_option('display.width', None)
 pd.set_option('display.max_colwidth', 50)  # set to 0 to see full job url / desc
 # 1 output to console
 print(jobs)
 # 2 display in Jupyter Notebook (1. pip install jupyter 2. jupyter notebook)
 # display(jobs)
 # 3 output to .csv
 # jobs.to_csv('jobs.csv', index=False)
 # 4 output to .xlsx
 # jobs.to_xlsx('jobs.xlsx', index=False)
 ```
@ -117,6 +99,9 @@ JobPost
 │   ├── max_amount (int)
 │   └── currency (enum)
 └── date_posted (date)
 └── emails (str)
 └── num_urgent_words (int)
 └── is_remote (bool) - just for Indeed at the momen
 ```
 ### Exceptions
--- a/examples/JobSpy_Demo.py
+++ b/examples/JobSpy_Demo.py
@ -6,23 +6,23 @@ jobs: pd.DataFrame = scrape_jobs(
    search_term="software engineer",
    location="Dallas, TX",
    results_wanted=50,  # be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho)
-    country_indeed='USA',
+    country_indeed="USA",
    offset=25  # start jobs from an offset (use if search failed and want to continue)
    # proxy="http://jobspy:5a4vpWtj8EeJ2hoYzk@ca.smartproxy.com:20001",
 )
 # formatting for pandas
-pd.set_option('display.max_columns', None)
+pd.set_option("display.max_columns", None)
-pd.set_option('display.max_rows', None)
+pd.set_option("display.max_rows", None)
-pd.set_option('display.width', None)
+pd.set_option("display.width", None)
-pd.set_option('display.max_colwidth', 50)  # set to 0 to see full job url / desc
+pd.set_option("display.max_colwidth", 50)  # set to 0 to see full job url / desc
 # 1: output to console
 print(jobs)
 # 2: output to .csv
-jobs.to_csv('./jobs.csv', index=False)
+jobs.to_csv("./jobs.csv", index=False)
-print('outputted to jobs.csv')
+print("outputted to jobs.csv")
 # 3: output to .xlsx
 # jobs.to_xlsx('jobs.xlsx', index=False)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.12"
+version = "1.1.13"
 description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
 homepage = "https://github.com/cullenwatson/JobSpy"
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@ -37,7 +37,7 @@ def scrape_jobs(
    country_indeed: str = "usa",
    hyperlinks: bool = False,
    proxy: Optional[str] = None,
-        offset: Optional[int] = 0
+    offset: Optional[int] = 0,
 ) -> pd.DataFrame:
    """
    Simultaneously scrapes job data from multiple job sites.
@ -72,7 +72,7 @@ def scrape_jobs(
        job_type=job_type,
        easy_apply=easy_apply,
        results_wanted=results_wanted,
-        offset=offset
+        offset=offset,
    )
    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
@ -120,9 +120,14 @@ def scrape_jobs(
            ] = f'<a href="{job_data["job_url"]}">{job_data["job_url"]}</a>'
            job_data["site"] = site
            job_data["company"] = job_data["company_name"]
-            job_data["job_type"] = ", ".join(job_type.value[0] for job_type in job_data["job_type"]) if job_data[
+            job_data["job_type"] = (
-                "job_type"] else None
+                ", ".join(job_type.value[0] for job_type in job_data["job_type"])
-            job_data["emails"] = ", ".join(job_data["emails"]) if job_data["emails"] else None
+                if job_data["job_type"]
                else None
            )
            job_data["emails"] = (
                ", ".join(job_data["emails"]) if job_data["emails"] else None
            )
            job_data["location"] = Location(**job_data["location"]).display_location()
            compensation_obj = job_data.get("compensation")
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@ -189,7 +189,7 @@ class JobPost(BaseModel):
    benefits: str | None = None
    emails: list[str] | None = None
    num_urgent_words: int | None = None
-    # is_remote: bool | None = None
+    is_remote: bool | None = None
    # company_industry: str | None = None
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@ -10,14 +10,13 @@ import io
 import json
 from datetime import datetime
 import tls_client
 import urllib.parse
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from concurrent.futures import ThreadPoolExecutor, Future
 from ..exceptions import IndeedException
-from ..utils import count_urgent_words, extract_emails_from_text
+from ..utils import count_urgent_words, extract_emails_from_text, create_session
 from ...jobs import (
    JobPost,
    Compensation,
@ -54,7 +53,7 @@ class IndeedScraper(Scraper):
        self.country = scraper_input.country
        domain = self.country.domain_value
        self.url = f"https://{domain}.indeed.com"
-        session = self.create_session()
+        session = create_session(self.proxy)
        params = {
            "q": scraper_input.search_term,
@ -155,8 +154,11 @@ class IndeedScraper(Scraper):
                compensation=compensation,
                date_posted=date_posted,
                job_url=job_url_client,
-                emails=extract_emails_from_text(description),
+                emails=extract_emails_from_text(description) if description else None,
                num_urgent_words=count_urgent_words(description)
                if description
                else None,
                is_remote=self.is_remote_job(job),
            )
            return job_post
@ -213,7 +215,7 @@ class IndeedScraper(Scraper):
        params = urllib.parse.parse_qs(parsed_url.query)
        jk_value = params.get("jk", [None])[0]
        formatted_url = f"{self.url}/viewjob?jk={jk_value}&spa=1"
-        session = self.create_session()
+        session = create_session(self.proxy)
        try:
            response = session.get(
@ -250,7 +252,9 @@ class IndeedScraper(Scraper):
                    label = taxonomy["attributes"][i].get("label")
                    if label:
                        job_type_str = label.replace("-", "").replace(" ", "").lower()
-                        job_types.append(IndeedScraper.get_enum_from_job_type(job_type_str))
+                        job_types.append(
                            IndeedScraper.get_enum_from_job_type(job_type_str)
                        )
        return job_types
    @staticmethod
@ -326,35 +330,26 @@ class IndeedScraper(Scraper):
    @staticmethod
    def get_headers():
        return {
-            'authority': 'www.indeed.com',
+            "authority": "www.indeed.com",
-            'accept': '*/*',
+            "accept": "*/*",
-            'accept-language': 'en-US,en;q=0.9',
+            "accept-language": "en-US,en;q=0.9",
-            'referer': 'https://www.indeed.com/viewjob?jk=fe6182337d72c7b1&tk=1hcbfcmd0k62t802&from=serp&vjs=3&advn=8132938064490989&adid=408692607&ad=-6NYlbfkN0A3Osc99MJFDKjquSk4WOGT28ALb_ad4QMtrHreCb9ICg6MiSVy9oDAp3evvOrI7Q-O9qOtQTg1EPbthP9xWtBN2cOuVeHQijxHjHpJC65TjDtftH3AXeINjBvAyDrE8DrRaAXl8LD3Fs1e_xuDHQIssdZ2Mlzcav8m5jHrA0fA64ZaqJV77myldaNlM7-qyQpy4AsJQfvg9iR2MY7qeC5_FnjIgjKIy_lNi9OPMOjGRWXA94CuvC7zC6WeiJmBQCHISl8IOBxf7EdJZlYdtzgae3593TFxbkd6LUwbijAfjax39aAuuCXy3s9C4YgcEP3TwEFGQoTpYu9Pmle-Ae1tHGPgsjxwXkgMm7Cz5mBBdJioglRCj9pssn-1u1blHZM4uL1nK9p1Y6HoFgPUU9xvKQTHjKGdH8d4y4ETyCMoNF4hAIyUaysCKdJKitC8PXoYaWhDqFtSMR4Jys8UPqUV&xkcb=SoDD-_M3JLQfWnQTDh0LbzkdCdPP&xpse=SoBa6_I3JLW9FlWZlB0PbzkdCdPP&sjdu=i6xVERweJM_pVUvgf-MzuaunBTY7G71J5eEX6t4DrDs5EMPQdODrX7Nn-WIPMezoqr5wA_l7Of-3CtoiUawcHw',
+            "referer": "https://www.indeed.com/viewjob?jk=fe6182337d72c7b1&tk=1hcbfcmd0k62t802&from=serp&vjs=3&advn=8132938064490989&adid=408692607&ad=-6NYlbfkN0A3Osc99MJFDKjquSk4WOGT28ALb_ad4QMtrHreCb9ICg6MiSVy9oDAp3evvOrI7Q-O9qOtQTg1EPbthP9xWtBN2cOuVeHQijxHjHpJC65TjDtftH3AXeINjBvAyDrE8DrRaAXl8LD3Fs1e_xuDHQIssdZ2Mlzcav8m5jHrA0fA64ZaqJV77myldaNlM7-qyQpy4AsJQfvg9iR2MY7qeC5_FnjIgjKIy_lNi9OPMOjGRWXA94CuvC7zC6WeiJmBQCHISl8IOBxf7EdJZlYdtzgae3593TFxbkd6LUwbijAfjax39aAuuCXy3s9C4YgcEP3TwEFGQoTpYu9Pmle-Ae1tHGPgsjxwXkgMm7Cz5mBBdJioglRCj9pssn-1u1blHZM4uL1nK9p1Y6HoFgPUU9xvKQTHjKGdH8d4y4ETyCMoNF4hAIyUaysCKdJKitC8PXoYaWhDqFtSMR4Jys8UPqUV&xkcb=SoDD-_M3JLQfWnQTDh0LbzkdCdPP&xpse=SoBa6_I3JLW9FlWZlB0PbzkdCdPP&sjdu=i6xVERweJM_pVUvgf-MzuaunBTY7G71J5eEX6t4DrDs5EMPQdODrX7Nn-WIPMezoqr5wA_l7Of-3CtoiUawcHw",
-            'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
+            "sec-ch-ua": '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
-            'sec-ch-ua-mobile': '?0',
+            "sec-ch-ua-mobile": "?0",
-            'sec-ch-ua-platform': '"Windows"',
+            "sec-ch-ua-platform": '"Windows"',
-            'sec-fetch-dest': 'empty',
+            "sec-fetch-dest": "empty",
-            'sec-fetch-mode': 'cors',
+            "sec-fetch-mode": "cors",
-            'sec-fetch-site': 'same-origin',
+            "sec-fetch-site": "same-origin",
-            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
        }
-    def create_session(self):
+    @staticmethod
    def is_remote_job(job: dict) -> bool:
        """
-        Creates a session with specific client identifiers and assigns proxies if available.
+        :param job:
-
+        :return: bool
        :return: A session object with or without proxies.
        """
-        session = tls_client.Session(
+        for taxonomy in job.get("taxonomyAttributes", []):
-            client_identifier="chrome112",
+            if taxonomy["label"] == "remote" and len(taxonomy["attributes"]) > 0:
-            random_tls_extension_order=True,
+                return True
-        )
+        return False
        session.proxies = self.proxy
        # TODO multiple proxies
        # if self.proxies:
        #     session.proxies = {
        #         "http": random.choice(self.proxies),
        #         "https": random.choice(self.proxies),
        #     }
        return session
--- a/src/jobspy/scrapers/linkedin/init.py
+++ b/src/jobspy/scrapers/linkedin/init.py
@ -93,13 +93,15 @@ class LinkedInScraper(Scraper):
                    break
                except requests.HTTPError as e:
-                    if hasattr(e, 'response') and e.response is not None:
+                    if hasattr(e, "response") and e.response is not None:
                        if e.response.status_code == 429:
                            time.sleep(self.DELAY)
                            retries += 1
                            continue
                        else:
-                            raise LinkedInException(f"bad response status code: {e.response.status_code}")
+                            raise LinkedInException(
                                f"bad response status code: {e.response.status_code}"
                            )
                    else:
                        raise
                except ProxyError as e:
@ -108,7 +110,9 @@ class LinkedInScraper(Scraper):
                    raise LinkedInException(str(e))
            else:
                # Raise an exception if the maximum number of retries is reached
-                raise LinkedInException("Max retries reached, failed to get a valid response")
+                raise LinkedInException(
                    "Max retries reached, failed to get a valid response"
                )
            soup = BeautifulSoup(response.text, "html.parser")
@ -135,7 +139,9 @@ class LinkedInScraper(Scraper):
                        if job_post:
                            job_list.append(job_post)
                    except Exception as e:
-                        raise LinkedInException("Exception occurred while processing jobs")
+                        raise LinkedInException(
                            "Exception occurred while processing jobs"
                        )
            page += 25
        job_list = job_list[: scraper_input.results_wanted]
@ -152,7 +158,11 @@ class LinkedInScraper(Scraper):
        metadata_card = job_card.find("div", class_="base-search-card__metadata")
        location = self.get_location(metadata_card)
-        datetime_tag = metadata_card.find("time", class_="job-search-card__listdate") if metadata_card else None
+        datetime_tag = (
            metadata_card.find("time", class_="job-search-card__listdate")
            if metadata_card
            else None
        )
        date_posted = None
        if datetime_tag and "datetime" in datetime_tag.attrs:
            datetime_str = datetime_tag["datetime"]
@ -172,14 +182,16 @@ class LinkedInScraper(Scraper):
            location=location,
            date_posted=date_posted,
            job_url=job_url,
            # job_type=[JobType.FULL_TIME],
            job_type=job_type,
            benefits=benefits,
-            emails=extract_emails_from_text(description),
+            emails=extract_emails_from_text(description) if description else None,
-            num_urgent_words=count_urgent_words(description)
+            num_urgent_words=count_urgent_words(description) if description else None,
        )
-    def get_job_description(self, job_page_url: str) -> tuple[None, None] | tuple[
+    def get_job_description(
-        str | None, tuple[str | None, JobType | None]]:
+        self, job_page_url: str
    ) -> tuple[None, None] | tuple[str | None, tuple[str | None, JobType | None]]:
        """
        Retrieves job description by going to the job page url
        :param job_page_url:
@ -233,7 +245,7 @@ class LinkedInScraper(Scraper):
    def get_enum_from_value(value_str):
        for job_type in JobType:
            if value_str in job_type.value:
-                return list[job_type]
+                return [job_type]
        return None
    def get_location(self, metadata_card: Optional[Tag]) -> Location:
--- a/src/jobspy/scrapers/ziprecruiter/init.py
+++ b/src/jobspy/scrapers/ziprecruiter/init.py
@ -11,7 +11,6 @@ from datetime import datetime, date
 from typing import Optional, Tuple, Any
 from urllib.parse import urlparse, parse_qs, urlunparse
 import tls_client
 import requests
 from bs4 import BeautifulSoup
 from bs4.element import Tag
@ -19,7 +18,7 @@ from concurrent.futures import ThreadPoolExecutor, Future
 from .. import Scraper, ScraperInput, Site
 from ..exceptions import ZipRecruiterException
-from ..utils import count_urgent_words, extract_emails_from_text
+from ..utils import count_urgent_words, extract_emails_from_text, create_session
 from ...jobs import (
    JobPost,
    Compensation,
@ -42,9 +41,6 @@ class ZipRecruiterScraper(Scraper):
        self.jobs_per_page = 20
        self.seen_urls = set()
        self.session = tls_client.Session(
            client_identifier="chrome112", random_tls_extension_order=True
        )
    def find_jobs_in_page(
        self, scraper_input: ScraperInput, page: int
@ -55,14 +51,13 @@ class ZipRecruiterScraper(Scraper):
        :param page:
        :return: jobs found on page
        """
-        job_list: list[JobPost] = []
+        session = create_session(self.proxy)
        try:
-            response = self.session.get(
+            response = session.get(
                f"{self.url}/jobs-search",
-                headers=ZipRecruiterScraper.headers(),
+                headers=self.headers(),
-                params=ZipRecruiterScraper.add_params(scraper_input, page),
+                params=self.add_params(scraper_input, page),
                allow_redirects=True,
                proxy=self.proxy,
                timeout_seconds=10,
            )
            if response.status_code != 200:
@ -116,7 +111,11 @@ class ZipRecruiterScraper(Scraper):
        :param scraper_input:
        :return: job_response
        """
-        start_page = (scraper_input.offset // self.jobs_per_page) + 1 if scraper_input.offset else 1
+        start_page = (
            (scraper_input.offset // self.jobs_per_page) + 1
            if scraper_input.offset
            else 1
        )
        #: get first page to initialize session
        job_list: list[JobPost] = self.find_jobs_in_page(scraper_input, start_page)
        pages_to_process = max(
@ -137,92 +136,10 @@ class ZipRecruiterScraper(Scraper):
        job_list = job_list[: scraper_input.results_wanted]
        return JobResponse(jobs=job_list)
    def process_job_html_1(self, job: Tag) -> Optional[JobPost]:
        """
        Parses a job from the job content tag
        :param job: BeautifulSoup Tag for one job post
        :return JobPost
        TODO this method isnt finished due to not encountering this type of html often
        """
        job_url = self.cleanurl(job.find("a", {"class": "job_link"})["href"])
        if job_url in self.seen_urls:
            return None
        title = job.find("h2", {"class": "title"}).text
        company = job.find("a", {"class": "company_name"}).text.strip()
        description, updated_job_url = self.get_description(job_url)
        # job_url = updated_job_url if updated_job_url else job_url
        if description is None:
            description = job.find("p", {"class": "job_snippet"}).text.strip()
        job_type_element = job.find("li", {"class": "perk_item perk_type"})
        job_type = None
        if job_type_element:
            job_type_text = (
                job_type_element.text.strip().lower().replace("_", "").replace(" ", "")
            )
            job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text)
        date_posted = ZipRecruiterScraper.get_date_posted(job)
        job_post = JobPost(
            title=title,
            description=description,
            company_name=company,
            location=ZipRecruiterScraper.get_location(job),
            job_type=job_type,
            compensation=ZipRecruiterScraper.get_compensation(job),
            date_posted=date_posted,
            job_url=job_url,
            emails=extract_emails_from_text(description),
            num_urgent_words=count_urgent_words(description)
        )
        return job_post
    def process_job_html_2(self, job: Tag) -> Optional[JobPost]:
        """
        Parses a job from the job content tag for a second variat of HTML that ZR uses
        :param job: BeautifulSoup Tag for one job post
        :return JobPost
        """
        job_url = self.cleanurl(job.find("a", class_="job_link")["href"])
        title = job.find("h2", class_="title").text
        company = job.find("a", class_="company_name").text.strip()
        description, updated_job_url = self.get_description(job_url)
        # job_url = updated_job_url if updated_job_url else job_url
        if description is None:
            description = job.find("p", class_="job_snippet").get_text().strip()
        job_type_text = job.find("li", class_="perk_item perk_type")
        job_type = None
        if job_type_text:
            job_type_text = (
                job_type_text.get_text()
                .strip()
                .lower()
                .replace("-", "")
                .replace(" ", "")
            )
            job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text)
        date_posted = ZipRecruiterScraper.get_date_posted(job)
        job_post = JobPost(
            title=title,
            description=description,
            company_name=company,
            location=ZipRecruiterScraper.get_location(job),
            job_type=job_type,
            compensation=ZipRecruiterScraper.get_compensation(job),
            date_posted=date_posted,
            job_url=job_url,
        )
        return job_post
    def process_job_javascript(self, job: dict) -> JobPost:
        """the most common type of jobs page on ZR"""
        title = job.get("Title")
-        job_url = self.cleanurl(job.get("JobURL"))
+        job_url = job.get("JobURL")
        description, updated_job_url = self.get_description(job_url)
        # job_url = updated_job_url if updated_job_url else job_url
@ -276,37 +193,126 @@ class ZipRecruiterScraper(Scraper):
        return JobPost(
            title=title,
            description=description,
            company_name=company,
            location=location,
            job_type=job_type,
            compensation=compensation,
            date_posted=date_posted,
            job_url=job_url,
            description=description,
            emails=extract_emails_from_text(description) if description else None,
            num_urgent_words=count_urgent_words(description) if description else None,
        )
    def process_job_html_2(self, job: Tag) -> Optional[JobPost]:
        """
        second most common type of jobs page on ZR after process_job_javascript()
        Parses a job from the job content tag for a second variat of HTML that ZR uses
        :param job: BeautifulSoup Tag for one job post
        :return JobPost
        """
        job_url = job.find("a", class_="job_link")["href"]
        title = job.find("h2", class_="title").text
        company = job.find("a", class_="company_name").text.strip()
        description, updated_job_url = self.get_description(job_url)
        # job_url = updated_job_url if updated_job_url else job_url
        if description is None:
            description = job.find("p", class_="job_snippet").get_text().strip()
        job_type_text = job.find("li", class_="perk_item perk_type")
        job_type = None
        if job_type_text:
            job_type_text = (
                job_type_text.get_text()
                .strip()
                .lower()
                .replace("-", "")
                .replace(" ", "")
            )
            job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text)
        date_posted = ZipRecruiterScraper.get_date_posted(job)
        job_post = JobPost(
            title=title,
            company_name=company,
            location=ZipRecruiterScraper.get_location(job),
            job_type=job_type,
            compensation=ZipRecruiterScraper.get_compensation(job),
            date_posted=date_posted,
            job_url=job_url,
            description=description,
            emails=extract_emails_from_text(description) if description else None,
            num_urgent_words=count_urgent_words(description) if description else None,
        )
        return job_post
    def process_job_html_1(self, job: Tag) -> Optional[JobPost]:
        """
        TODO this method isnt finished due to not encountering this type of html often
        least common type of jobs page on ZR (rarely found)
        Parses a job from the job content tag
        :param job: BeautifulSoup Tag for one job post
        :return JobPost
        """
        job_url = job.find("a", {"class": "job_link"})["href"]
        # job_url = self.cleanurl(job.find("a", {"class": "job_link"})["href"])
        if job_url in self.seen_urls:
            return None
        title = job.find("h2", {"class": "title"}).text
        company = job.find("a", {"class": "company_name"}).text.strip()
        description, _ = self.get_description(job_url)
        # job_url = updated_job_url if updated_job_url else job_url
        # get description from jobs listing page if get_description from the specific job page fails
        if description is None:
            description = job.find("p", {"class": "job_snippet"}).text.strip()
        job_type_element = job.find("li", {"class": "perk_item perk_type"})
        job_type = None
        if job_type_element:
            job_type_text = (
                job_type_element.text.strip().lower().replace("_", "").replace(" ", "")
            )
            job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text)
        date_posted = ZipRecruiterScraper.get_date_posted(job)
        job_post = JobPost(
            title=title,
            description=description,
            company_name=company,
            location=ZipRecruiterScraper.get_location(job),
            job_type=job_type,
            compensation=ZipRecruiterScraper.get_compensation(job),
            date_posted=date_posted,
            job_url=job_url,
            emails=extract_emails_from_text(description),
            num_urgent_words=count_urgent_words(description),
        )
        return job_post
    @staticmethod
-    def get_job_type_enum(job_type_str: str) -> Optional[list[JobType]]:
+    def get_job_type_enum(job_type_str: str) -> list[JobType] | None:
        for job_type in JobType:
            if job_type_str in job_type.value:
                return [job_type]
        return None
-    def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]:
+    def get_description(self, job_page_url: str) -> Tuple[str | None, str | None]:
        """
        Retrieves job description by going to the job page url
        :param job_page_url:
        :param session:
        :return: description or None, response url
        """
        try:
-            response = requests.get(
+            session = create_session(self.proxy)
            response = session.get(
                job_page_url,
-                headers=ZipRecruiterScraper.headers(),
+                headers=self.headers(),
                allow_redirects=True,
-                timeout=5,
+                timeout_seconds=5,
                proxies=self.proxy,
            )
            if response.status_code not in range(200, 400):
                return None, None
@ -462,8 +468,8 @@ class ZipRecruiterScraper(Scraper):
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"
        }
-    @staticmethod
+    # @staticmethod
-    def cleanurl(url):
+    # def cleanurl(url) -> str:
-        parsed_url = urlparse(url)
+    #     parsed_url = urlparse(url)
-
+    #
-        return urlunparse((parsed_url.scheme, parsed_url.netloc, parsed_url.path, parsed_url.params, '', ''))
+    #     return urlunparse((parsed_url.scheme, parsed_url.netloc, parsed_url.path, parsed_url.params, '', ''))
--- a/src/tests/test_all.py
+++ b/src/tests/test_all.py
@ -9,4 +9,6 @@ def test_all():
        results_wanted=5,
    )
-    assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame"
+    assert (
        isinstance(result, pd.DataFrame) and not result.empty
    ), "Result should be a non-empty DataFrame"
--- a/src/tests/test_indeed.py
+++ b/src/tests/test_indeed.py
@ -7,4 +7,6 @@ def test_indeed():
        site_name="indeed",
        search_term="software engineer",
    )
-    assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame"
+    assert (
        isinstance(result, pd.DataFrame) and not result.empty
    ), "Result should be a non-empty DataFrame"
--- a/src/tests/test_linkedin.py
+++ b/src/tests/test_linkedin.py
@ -7,4 +7,6 @@ def test_linkedin():
        site_name="linkedin",
        search_term="software engineer",
    )
-    assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame"
+    assert (
        isinstance(result, pd.DataFrame) and not result.empty
    ), "Result should be a non-empty DataFrame"
--- a/src/tests/test_ziprecruiter.py
+++ b/src/tests/test_ziprecruiter.py
@ -8,4 +8,6 @@ def test_ziprecruiter():
        search_term="software engineer",
    )
-    assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame"
+    assert (
        isinstance(result, pd.DataFrame) and not result.empty
    ), "Result should be a non-empty DataFrame"