enh: proxies (#157)

* enh: proxies * enh: proxies
2024-05-25 14:04:09 -05:00 · 2024-05-25 14:04:09 -05:00 · 5cb7ffe5fd
parent cd29f79796
commit 5cb7ffe5fd
12 changed files with 149 additions and 354 deletions
--- a/README.md
+++ b/README.md
@ -11,7 +11,7 @@ work with us.*

 - Scrapes job postings from **LinkedIn**, **Indeed**, **Glassdoor**, & **ZipRecruiter** simultaneously
 - Aggregates the job postings in a Pandas DataFrame
- Proxy support
+- Proxies support

 [Video Guide for JobSpy](https://www.youtube.com/watch?v=RuP1HrAZnxs&pp=ygUgam9icyBzY3JhcGVyIGJvdCBsaW5rZWRpbiBpbmRlZWQ%3D) -
 Updated for release v1.1.3
@ -39,7 +39,10 @@ jobs = scrape_jobs(
    results_wanted=20,
    hours_old=72, # (only Linkedin/Indeed is hour specific, others round up to days old)
    country_indeed='USA',  # only needed for indeed / glassdoor
+    
    # linkedin_fetch_description=True # get full description and direct job url for linkedin (slower)
+    # proxies=["Efb5EA8OIk0BQb:wifi;us;@proxy.soax.com:9000", "localhost"],
+    
 )
 print(f"Found {len(jobs)} jobs")
 print(jobs.head())
@ -76,8 +79,9 @@ Optional
 ├── job_type (str): 
 |    fulltime, parttime, internship, contract
 │
-├── proxy (str): 
-|    in format 'http://user:pass@host:port'
+├── proxies (): 
+|    in format ['user:pass@host:port', 'localhost']
+|    each job board will round robin through the proxies
 │
 ├── is_remote (bool)
 │
@ -201,7 +205,7 @@ You can specify the following countries when searching on Indeed (use the exact
 ## Notes
 * Indeed is the best scraper currently with no rate limiting.  
 * All the job board endpoints are capped at around 1000 jobs on a given search.  
-* LinkedIn is the most restrictive and usually rate limits around the 10th page.
+* LinkedIn is the most restrictive and usually rate limits around the 10th page with one ip. Proxies are a must basically.

 ## Frequently Asked Questions

@ -216,7 +220,7 @@ persist, [submit an issue](https://github.com/Bunsly/JobSpy/issues).
 **Q: Received a response code 429?**  
 **A:** This indicates that you have been blocked by the job board site for sending too many requests. All of the job board sites are aggressive with blocking. We recommend:

- Waiting some time between scrapes (site-dependent).
- Trying a VPN or proxy to change your IP address.
+- Wait some time between scrapes (site-dependent).
+- Try using the proxies param to change your IP address.

 ---
--- a/examples/JobSpy_AllSites.py
+++ b/examples/JobSpy_AllSites.py
@ -1,30 +0,0 @@
-from jobspy import scrape_jobs
-import pandas as pd
-
-jobs: pd.DataFrame = scrape_jobs(
-    site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
-    search_term="software engineer",
-    location="Dallas, TX",
-    results_wanted=25,  # be wary the higher it is, the more likey you'll get blocked (rotating proxy can help tho)
-    country_indeed="USA",
-    # proxy="http://jobspy:5a4vpWtj8EeJ2hoYzk@ca.smartproxy.com:20001",
-)
-
-# formatting for pandas
-pd.set_option("display.max_columns", None)
-pd.set_option("display.max_rows", None)
-pd.set_option("display.width", None)
-pd.set_option("display.max_colwidth", 50)  # set to 0 to see full job url / desc
-
-# 1: output to console
-print(jobs)
-
-# 2: output to .csv
-jobs.to_csv("./jobs.csv", index=False)
-print("outputted to jobs.csv")
-
-# 3: output to .xlsx
-# jobs.to_xlsx('jobs.xlsx', index=False)
-
-# 4: display in Jupyter Notebook (1. pip install jupyter 2. jupyter notebook)
-# display(jobs)
--- a/examples/JobSpy_Demo.ipynb
+++ b/examples/JobSpy_Demo.ipynb
@ -1,167 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00a94b47-f47b-420f-ba7e-714ef219c006",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from jobspy import scrape_jobs\n",
-    "import pandas as pd\n",
-    "from IPython.display import display, HTML"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9f773e6c-d9fc-42cc-b0ef-63b739e78435",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pd.set_option('display.max_columns', None)\n",
-    "pd.set_option('display.max_rows', None)\n",
-    "pd.set_option('display.width', None)\n",
-    "pd.set_option('display.max_colwidth', 50)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1253c1f8-9437-492e-9dd3-e7fe51099420",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# example 1 (no hyperlinks, USA)\n",
-    "jobs = scrape_jobs(\n",
-    "    site_name=[\"linkedin\"],\n",
-    "    location='san francisco',\n",
-    "    search_term=\"engineer\",\n",
-    "    results_wanted=5,\n",
-    "\n",
-    "    # use if you want to use a proxy\n",
-    "    # proxy=\"socks5://jobspy:5a4vpWtj4EeJ2hoYzk@us.smartproxy.com:10001\",\n",
-    "    proxy=\"http://jobspy:5a4vpWtj4EeJ2hoYzk@us.smartproxy.com:10001\",\n",
-    "    #proxy=\"https://jobspy:5a4vpWtj4EeJ2hoYzk@us.smartproxy.com:10001\",\n",
-    ")\n",
-    "display(jobs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6a581b2d-f7da-4fac-868d-9efe143ee20a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# example 2 - remote USA & hyperlinks\n",
-    "jobs = scrape_jobs(\n",
-    "    site_name=[\"linkedin\", \"zip_recruiter\", \"indeed\"],\n",
-    "    # location='san francisco',\n",
-    "    search_term=\"software engineer\",\n",
-    "    country_indeed=\"USA\",\n",
-    "    hyperlinks=True,\n",
-    "    is_remote=True,\n",
-    "    results_wanted=5, \n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fe8289bc-5b64-4202-9a64-7c117c83fd9a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# use if hyperlinks=True\n",
-    "html = jobs.to_html(escape=False)\n",
-    "# change max-width: 200px to show more or less of the content\n",
-    "truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'\n",
-    "display(HTML(truncate_width))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "951c2fe1-52ff-407d-8bb1-068049b36777",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# example 3 - with hyperlinks, international - linkedin (no zip_recruiter)\n",
-    "jobs = scrape_jobs(\n",
-    "    site_name=[\"linkedin\"],\n",
-    "    location='berlin',\n",
-    "    search_term=\"engineer\",\n",
-    "    hyperlinks=True,\n",
-    "    results_wanted=5,\n",
-    "    easy_apply=True\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1e37a521-caef-441c-8fc2-2eb5b2e7da62",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# use if hyperlinks=True\n",
-    "html = jobs.to_html(escape=False)\n",
-    "# change max-width: 200px to show more or less of the content\n",
-    "truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'\n",
-    "display(HTML(truncate_width))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0650e608-0b58-4bf5-ae86-68348035b16a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# example 4 - international indeed (no zip_recruiter)\n",
-    "jobs = scrape_jobs(\n",
-    "    site_name=[\"indeed\"],\n",
-    "    search_term=\"engineer\",\n",
-    "    country_indeed = \"China\",\n",
-    "    hyperlinks=True\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "40913ac8-3f8a-4d7e-ac47-afb88316432b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# use if hyperlinks=True\n",
-    "html = jobs.to_html(escape=False)\n",
-    "# change max-width: 200px to show more or less of the content\n",
-    "truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'\n",
-    "display(HTML(truncate_width))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/examples/JobSpy_LongScrape.py
+++ b/examples/JobSpy_LongScrape.py
@ -1,78 +0,0 @@
-from jobspy import scrape_jobs
-import pandas as pd
-import os
-import time
-
-# creates csv a new filename if the jobs.csv already exists.
-csv_filename = "jobs.csv"
-counter = 1
-while os.path.exists(csv_filename):
-    csv_filename = f"jobs_{counter}.csv"
-    counter += 1
-
-# results wanted and offset
-results_wanted = 1000
-offset = 0
-
-all_jobs = []
-
-# max retries
-max_retries = 3
-
-# nuumber of results at each iteration
-results_in_each_iteration = 30
-
-while len(all_jobs) < results_wanted:
-    retry_count = 0
-    while retry_count < max_retries:
-        print("Doing from", offset, "to", offset + results_in_each_iteration, "jobs")
-        try:
-            jobs = scrape_jobs(
-                site_name=["indeed"],
-                search_term="software engineer",
-                # New York, NY
-                # Dallas, TX
-                # Los Angeles, CA
-                location="Los Angeles, CA",
-                results_wanted=min(
-                    results_in_each_iteration, results_wanted - len(all_jobs)
-                ),
-                country_indeed="USA",
-                offset=offset,
-                # proxy="http://jobspy:5a4vpWtj8EeJ2hoYzk@ca.smartproxy.com:20001",
-            )
-
-            # Add the scraped jobs to the list
-            all_jobs.extend(jobs.to_dict("records"))
-
-            # Increment the offset for the next page of results
-            offset += results_in_each_iteration
-
-            # Add a delay to avoid rate limiting (you can adjust the delay time as needed)
-            print(f"Scraped {len(all_jobs)} jobs")
-            print("Sleeping secs", 100 * (retry_count + 1))
-            time.sleep(100 * (retry_count + 1))  # Sleep for 2 seconds between requests
-
-            break  # Break out of the retry loop if successful
-        except Exception as e:
-            print(f"Error: {e}")
-            retry_count += 1
-            print("Sleeping secs before retry", 100 * (retry_count + 1))
-            time.sleep(100 * (retry_count + 1))
-            if retry_count >= max_retries:
-                print("Max retries reached. Exiting.")
-                break
-
-# DataFrame from the collected job data
-jobs_df = pd.DataFrame(all_jobs)
-
-# Formatting
-pd.set_option("display.max_columns", None)
-pd.set_option("display.max_rows", None)
-pd.set_option("display.width", None)
-pd.set_option("display.max_colwidth", 50)
-
-print(jobs_df)
-
-jobs_df.to_csv(csv_filename, index=False)
-print(f"Outputted to {csv_filename}")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.53"
+version = "1.1.54"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
 homepage = "https://github.com/Bunsly/JobSpy"
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@ -30,7 +30,7 @@ def scrape_jobs(
    results_wanted: int = 15,
    country_indeed: str = "usa",
    hyperlinks: bool = False,
-    proxy: str | None = None,
+    proxies: list[str] | str | None = None,
    description_format: str = "markdown",
    linkedin_fetch_description: bool | None = False,
    linkedin_company_ids: list[int] | None = None,
@ -96,7 +96,7 @@ def scrape_jobs(

    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
        scraper_class = SCRAPER_MAPPING[site]
-        scraper = scraper_class(proxy=proxy)
+        scraper = scraper_class(proxies=proxies)
        scraped_data: JobResponse = scraper.scrape(scraper_input)
        cap_name = site.value.capitalize()
        site_name = "ZipRecruiter" if cap_name == "Zip_recruiter" else cap_name
--- a/src/jobspy/scrapers/init.py
+++ b/src/jobspy/scrapers/init.py
@ -39,9 +39,9 @@ class ScraperInput(BaseModel):


 class Scraper(ABC):
-    def __init__(self, site: Site, proxy: list[str] | None = None):
+    def __init__(self, site: Site, proxies: list[str] | None = None):
+        self.proxies = proxies
        self.site = site
-        self.proxy = (lambda p: {"http": p, "https": p} if p else None)(proxy)

    @abstractmethod
    def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...
--- a/src/jobspy/scrapers/glassdoor/init.py
+++ b/src/jobspy/scrapers/glassdoor/init.py
@ -34,12 +34,12 @@ from ...jobs import (


 class GlassdoorScraper(Scraper):
-    def __init__(self, proxy: Optional[str] = None):
+    def __init__(self, proxies: list[str] | str | None = None):
        """
        Initializes GlassdoorScraper with the Glassdoor job search url
        """
        site = Site(Site.GLASSDOOR)
-        super().__init__(site, proxy=proxy)
+        super().__init__(site, proxies=proxies)

        self.base_url = None
        self.country = None
@ -59,7 +59,7 @@ class GlassdoorScraper(Scraper):
        self.scraper_input.results_wanted = min(900, scraper_input.results_wanted)
        self.base_url = self.scraper_input.country.get_glassdoor_url()

-        self.session = create_session(self.proxy, is_tls=True, has_retry=True)
+        self.session = create_session(proxies=self.proxies, is_tls=True, has_retry=True)
        token = self._get_csrf_token()
        self.headers["gd-csrf-token"] = token if token else self.fallback_token

@ -245,7 +245,6 @@ class GlassdoorScraper(Scraper):
        if not location or is_remote:
            return "11047", "STATE"  # remote options
        url = f"{self.base_url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
-        session = create_session(self.proxy, has_retry=True)
        res = self.session.get(url, headers=self.headers)
        if res.status_code != 200:
            if res.status_code == 429:
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@ -12,14 +12,13 @@ from typing import Tuple
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, Future

-import requests
-
 from .. import Scraper, ScraperInput, Site
 from ..utils import (
    extract_emails_from_text,
    get_enum_from_job_type,
    markdown_converter,
    logger,
+    create_session,
 )
 from ...jobs import (
    JobPost,
@ -33,10 +32,13 @@ from ...jobs import (


 class IndeedScraper(Scraper):
-    def __init__(self, proxy: str | None = None):
+    def __init__(self, proxies: list[str] | str | None = None):
        """
        Initializes IndeedScraper with the Indeed API url
        """
+        super().__init__(Site.INDEED, proxies=proxies)
+
+        self.session = create_session(proxies=self.proxies, is_tls=False)
        self.scraper_input = None
        self.jobs_per_page = 100
        self.num_workers = 10
@ -45,8 +47,6 @@ class IndeedScraper(Scraper):
        self.api_country_code = None
        self.base_url = None
        self.api_url = "https://apis.indeed.com/graphql"
-        site = Site(Site.INDEED)
-        super().__init__(site, proxy=proxy)

    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
        """
@ -90,13 +90,13 @@ class IndeedScraper(Scraper):
        jobs = []
        new_cursor = None
        filters = self._build_filters()
-        search_term = self.scraper_input.search_term.replace('"', '\\"') if self.scraper_input.search_term else ""
-        query = self.job_search_query.format(
-            what=(
-                f'what: "{search_term}"'
-                if search_term
+        search_term = (
+            self.scraper_input.search_term.replace('"', '\\"')
+            if self.scraper_input.search_term
            else ""
-            ),
+        )
+        query = self.job_search_query.format(
+            what=(f'what: "{search_term}"' if search_term else ""),
            location=(
                f'location: {{where: "{self.scraper_input.location}", radius: {self.scraper_input.distance}, radiusUnit: MILES}}'
                if self.scraper_input.location
@ -111,11 +111,10 @@ class IndeedScraper(Scraper):
        }
        api_headers = self.api_headers.copy()
        api_headers["indeed-co"] = self.api_country_code
-        response = requests.post(
+        response = self.session.post(
            self.api_url,
            headers=api_headers,
            json=payload,
-            proxies=self.proxy,
            timeout=10,
        )
        if response.status_code != 200:
--- a/src/jobspy/scrapers/linkedin/init.py
+++ b/src/jobspy/scrapers/linkedin/init.py
@ -10,14 +10,13 @@ from __future__ import annotations
 import time
 import random
 import regex as re
-import urllib.parse
 from typing import Optional
 from datetime import datetime

 from threading import Lock
 from bs4.element import Tag
 from bs4 import BeautifulSoup
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlparse, urlunparse, unquote

 from .. import Scraper, ScraperInput, Site
 from ..exceptions import LinkedInException
@ -46,11 +45,19 @@ class LinkedInScraper(Scraper):
    band_delay = 4
    jobs_per_page = 25

-    def __init__(self, proxy: Optional[str] = None):
+    def __init__(self, proxies: list[str] | str | None = None):
        """
        Initializes LinkedInScraper with the LinkedIn job search url
        """
-        super().__init__(Site(Site.LINKEDIN), proxy=proxy)
+        super().__init__(Site.LINKEDIN, proxies=proxies)
+        self.session = create_session(
+            proxies=self.proxies,
+            is_tls=False,
+            has_retry=True,
+            delay=5,
+            clear_cookies=True,
+        )
+        self.session.headers.update(self.headers)
        self.scraper_input = None
        self.country = "worldwide"
        self.job_url_direct_regex = re.compile(r'(?<=\?url=)[^"]+')
@ -74,7 +81,6 @@ class LinkedInScraper(Scraper):
        )
        while continue_search():
            logger.info(f"LinkedIn search page: {page // 25 + 1}")
-            session = create_session(is_tls=False, has_retry=True, delay=5)
            params = {
                "keywords": scraper_input.search_term,
                "location": scraper_input.location,
@ -99,12 +105,9 @@ class LinkedInScraper(Scraper):

            params = {k: v for k, v in params.items() if v is not None}
            try:
-                response = session.get(
+                response = self.session.get(
                    f"{self.base_url}/jobs-guest/jobs/api/seeMoreJobPostings/search?",
                    params=params,
-                    allow_redirects=True,
-                    proxies=self.proxy,
-                    headers=self.headers,
                    timeout=10,
                )
                if response.status_code not in range(200, 400):
@ -241,10 +244,7 @@ class LinkedInScraper(Scraper):
        :return: dict
        """
        try:
-            session = create_session(is_tls=False, has_retry=True)
-            response = session.get(
-                job_page_url, headers=self.headers, timeout=5, proxies=self.proxy
-            )
+            response = self.session.get(job_page_url, timeout=5)
            response.raise_for_status()
        except:
            return {}
@ -340,7 +340,7 @@ class LinkedInScraper(Scraper):
                job_url_direct_content.decode_contents().strip()
            )
            if job_url_direct_match:
-                job_url_direct = urllib.parse.unquote(job_url_direct_match.group())
+                job_url_direct = unquote(job_url_direct_match.group())

        return job_url_direct

--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@ -2,6 +2,8 @@ from __future__ import annotations

 import re
 import logging
+from itertools import cycle
+
 import requests
 import tls_client
 import numpy as np
@ -21,6 +23,104 @@ if not logger.handlers:
    logger.addHandler(console_handler)


+class RotatingProxySession:
+    def __init__(self, proxies=None):
+        if isinstance(proxies, str):
+            self.proxy_cycle = cycle([self.format_proxy(proxies)])
+        elif isinstance(proxies, list):
+            self.proxy_cycle = (
+                cycle([self.format_proxy(proxy) for proxy in proxies])
+                if proxies
+                else None
+            )
+        else:
+            self.proxy_cycle = None
+
+    @staticmethod
+    def format_proxy(proxy):
+        """Utility method to format a proxy string into a dictionary."""
+        if proxy.startswith("http://") or proxy.startswith("https://"):
+            return {"http": proxy, "https": proxy}
+        return {"http": f"http://{proxy}", "https": f"http://{proxy}"}
+
+
+class RequestsRotating(RotatingProxySession, requests.Session):
+
+    def __init__(self, proxies=None, has_retry=False, delay=1, clear_cookies=False):
+        RotatingProxySession.__init__(self, proxies=proxies)
+        requests.Session.__init__(self)
+        self.clear_cookies = clear_cookies
+        self.allow_redirects = True
+        self.setup_session(has_retry, delay)
+
+    def setup_session(self, has_retry, delay):
+        if has_retry:
+            retries = Retry(
+                total=3,
+                connect=3,
+                status=3,
+                status_forcelist=[500, 502, 503, 504, 429],
+                backoff_factor=delay,
+            )
+            adapter = HTTPAdapter(max_retries=retries)
+            self.mount("http://", adapter)
+            self.mount("https://", adapter)
+
+    def request(self, method, url, **kwargs):
+        if self.clear_cookies:
+            self.cookies.clear()
+
+        if self.proxy_cycle:
+            next_proxy = next(self.proxy_cycle)
+            if next_proxy["http"] != "http://localhost":
+                self.proxies = next_proxy
+            else:
+                self.proxies = {}
+        return requests.Session.request(self, method, url, **kwargs)
+
+
+class TLSRotating(RotatingProxySession, tls_client.Session):
+
+    def __init__(self, proxies=None):
+        RotatingProxySession.__init__(self, proxies=proxies)
+        tls_client.Session.__init__(self, random_tls_extension_order=True)
+
+    def execute_request(self, *args, **kwargs):
+        if self.proxy_cycle:
+            next_proxy = next(self.proxy_cycle)
+            if next_proxy["http"] != "http://localhost":
+                self.proxies = next_proxy
+            else:
+                self.proxies = {}
+        response = tls_client.Session.execute_request(self, *args, **kwargs)
+        return response
+
+
+def create_session(
+    *,
+    proxies: dict | str | None = None,
+    is_tls: bool = True,
+    has_retry: bool = False,
+    delay: int = 1,
+    clear_cookies: bool = False,
+) -> requests.Session:
+    """
+    Creates a requests session with optional tls, proxy, and retry settings.
+    :return: A session object
+    """
+    if is_tls:
+        session = TLSRotating(proxies=proxies)
+    else:
+        session = RequestsRotating(
+            proxies=proxies,
+            has_retry=has_retry,
+            delay=delay,
+            clear_cookies=clear_cookies,
+        )
+
+    return session
+
+
 def set_logger_level(verbose: int = 2):
    """
    Adjusts the logger's level. This function allows the logging level to be changed at runtime.
@ -52,39 +152,6 @@ def extract_emails_from_text(text: str) -> list[str] | None:
    return email_regex.findall(text)


-def create_session(
-    proxy: dict | None = None,
-    is_tls: bool = True,
-    has_retry: bool = False,
-    delay: int = 1,
-) -> requests.Session:
-    """
-    Creates a requests session with optional tls, proxy, and retry settings.
-    :return: A session object
-    """
-    if is_tls:
-        session = tls_client.Session(random_tls_extension_order=True)
-        session.proxies = proxy
-    else:
-        session = requests.Session()
-        session.allow_redirects = True
-        if proxy:
-            session.proxies.update(proxy)
-        if has_retry:
-            retries = Retry(
-                total=3,
-                connect=3,
-                status=3,
-                status_forcelist=[500, 502, 503, 504, 429],
-                backoff_factor=delay,
-            )
-            adapter = HTTPAdapter(max_retries=retries)
-
-            session.mount("http://", adapter)
-            session.mount("https://", adapter)
-    return session
-
-
 def get_enum_from_job_type(job_type_str: str) -> JobType | None:
    """
    Given a string, returns the corresponding JobType enum member if a match is found.
--- a/src/jobspy/scrapers/ziprecruiter/init.py
+++ b/src/jobspy/scrapers/ziprecruiter/init.py
@ -36,14 +36,15 @@ class ZipRecruiterScraper(Scraper):
    base_url = "https://www.ziprecruiter.com"
    api_url = "https://api.ziprecruiter.com"

-    def __init__(self, proxy: Optional[str] = None):
+    def __init__(self, proxies: list[str] | str | None = None):
        """
        Initializes ZipRecruiterScraper with the ZipRecruiter job search url
        """
+        super().__init__(Site.ZIP_RECRUITER, proxies=proxies)
+
        self.scraper_input = None
-        self.session = create_session(proxy)
+        self.session = create_session(proxies=proxies)
        self._get_cookies()
-        super().__init__(Site.ZIP_RECRUITER, proxy=proxy)

        self.delay = 5
        self.jobs_per_page = 20
@ -151,7 +152,7 @@ class ZipRecruiterScraper(Scraper):
        comp_max = int(job["compensation_max"]) if "compensation_max" in job else None
        comp_currency = job.get("compensation_currency")
        return JobPost(
-            id=str(job['listing_key']),
+            id=str(job["listing_key"]),
            title=title,
            company_name=company,
            location=location,