chore:version

FEATURE: Add the "ca_cert" setting for providing a Certification Authority certificate in order to use proxies requiring it. (#204 )
fix:turkey
2026-03-05 03:54:31 -08:00 · 2024-10-08 17:49:06 -05:00 · 2024-10-08 17:46:46 -05:00 · 2024-10-02 01:31:00 -05:00 · 2024-09-19 20:41:22 -05:00 · 2024-09-18 18:49:41 -05:00
10 changed files with 40 additions and 22 deletions
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ jobs = scrape_jobs(
    hours_old=72, # (only Linkedin/Indeed is hour specific, others round up to days old)
    country_indeed='USA',  # only needed for indeed / glassdoor
-    # linkedin_fetch_description=True # get full description , direct job url , company industry and job level (seniority level) for linkedin (slower)
+    # linkedin_fetch_description=True # get more info such as full description, direct job url for linkedin (slower)
    # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
 )
@@ -79,6 +79,9 @@ Optional
 ├── proxies (list): 
 |    in format ['user:pass@host:port', 'localhost']
 |    each job board scraper will round robin through the proxies
 |
 ├── ca_cert (str)
 |    path to CA Certificate file for proxies
 │
 ├── is_remote (bool)
 │
@@ -216,10 +219,8 @@ You can specify the following countries when searching on Indeed (use the exact
 ## Frequently Asked Questions
 ---
-
+**Q: Why is Indeed giving unrelated roles?**  
-**Q: Encountering issues with your queries?**  
+**A:** Indeed is searching each one of your terms e.g. software intern, it searches software OR intern. Try search_term='"software intern"' in quotes for stricter searching
 **A:** Try reducing the number of `results_wanted` and/or broadening the filters. If problems
 persist, [submit an issue](https://github.com/Bunsly/JobSpy/issues).
 ---
@@ -230,3 +231,9 @@ persist, [submit an issue](https://github.com/Bunsly/JobSpy/issues).
 - Try using the proxies param to change your IP address.
 ---
 **Q: Encountering issues with your queries?**  
 **A:** Try reducing the number of `results_wanted` and/or broadening the filters. If problems
 persist, [submit an issue](https://github.com/Bunsly/JobSpy/issues).
 ---
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.62"
+version = "1.1.69"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
 homepage = "https://github.com/Bunsly/JobSpy"
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@@ -31,6 +31,7 @@ def scrape_jobs(
    country_indeed: str = "usa",
    hyperlinks: bool = False,
    proxies: list[str] | str | None = None,
    ca_cert: str | None = None,
    description_format: str = "markdown",
    linkedin_fetch_description: bool | None = False,
    linkedin_company_ids: list[int] | None = None,
@@ -97,7 +98,7 @@ def scrape_jobs(
    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
        scraper_class = SCRAPER_MAPPING[site]
-        scraper = scraper_class(proxies=proxies)
+        scraper = scraper_class(proxies=proxies, ca_cert=ca_cert)
        scraped_data: JobResponse = scraper.scrape(scraper_input)
        cap_name = site.value.capitalize()
        site_name = "ZipRecruiter" if cap_name == "Zip_recruiter" else cap_name
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@@ -93,6 +93,7 @@ class Country(Enum):
    KUWAIT = ("kuwait", "kw")
    LUXEMBOURG = ("luxembourg", "lu")
    MALAYSIA = ("malaysia", "malaysia:my", "com")
    MALTA = ("malta", "malta:mt", "mt")
    MEXICO = ("mexico", "mx", "com.mx")
    MOROCCO = ("morocco", "ma")
    NETHERLANDS = ("netherlands", "nl", "nl")
@@ -117,7 +118,7 @@ class Country(Enum):
    SWITZERLAND = ("switzerland", "ch", "de:ch")
    TAIWAN = ("taiwan", "tw")
    THAILAND = ("thailand", "th")
-    TURKEY = ("turkey", "tr")
+    TURKEY = ("türkiye,turkey", "tr")
    UKRAINE = ("ukraine", "ua")
    UNITEDARABEMIRATES = ("united arab emirates", "ae")
    UK = ("uk,united kingdom", "uk:gb", "co.uk")
--- a/src/jobspy/scrapers/init.py
+++ b/src/jobspy/scrapers/init.py
@@ -42,9 +42,10 @@ class ScraperInput(BaseModel):
 class Scraper(ABC):
-    def __init__(self, site: Site, proxies: list[str] | None = None):
+    def __init__(self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None):
        self.proxies = proxies
        self.site = site
        self.proxies = proxies
        self.ca_cert = ca_cert
    @abstractmethod
    def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...
--- a/src/jobspy/scrapers/glassdoor/init.py
+++ b/src/jobspy/scrapers/glassdoor/init.py
@@ -34,12 +34,12 @@ from ...jobs import (
 class GlassdoorScraper(Scraper):
-    def __init__(self, proxies: list[str] | str | None = None):
+    def __init__(self, proxies: list[str] | str | None = None, ca_cert: str | None = None):
        """
        Initializes GlassdoorScraper with the Glassdoor job search url
        """
        site = Site(Site.GLASSDOOR)
-        super().__init__(site, proxies=proxies)
+        super().__init__(site, proxies=proxies, ca_cert=ca_cert)
        self.base_url = None
        self.country = None
@@ -59,7 +59,7 @@ class GlassdoorScraper(Scraper):
        self.scraper_input.results_wanted = min(900, scraper_input.results_wanted)
        self.base_url = self.scraper_input.country.get_glassdoor_url()
-        self.session = create_session(proxies=self.proxies, is_tls=True, has_retry=True)
+        self.session = create_session(proxies=self.proxies, ca_cert=self.ca_cert, is_tls=True, has_retry=True)
        token = self._get_csrf_token()
        self.headers["gd-csrf-token"] = token if token else self.fallback_token
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@@ -32,13 +32,13 @@ from ...jobs import (
 class IndeedScraper(Scraper):
-    def __init__(self, proxies: list[str] | str | None = None):
+    def __init__(self, proxies: list[str] | str | None = None, ca_cert: str | None = None):
        """
        Initializes IndeedScraper with the Indeed API url
        """
        super().__init__(Site.INDEED, proxies=proxies)
-        self.session = create_session(proxies=self.proxies, is_tls=False)
+        self.session = create_session(proxies=self.proxies, ca_cert=ca_cert, is_tls=False)
        self.scraper_input = None
        self.jobs_per_page = 100
        self.num_workers = 10
@@ -364,8 +364,8 @@ class IndeedScraper(Scraper):
            {what}
            {location}
            limit: 100
            sort: DATE
            {cursor}
            sort: RELEVANCE
            {filters}
          ) {{
            pageInfo {{
--- a/src/jobspy/scrapers/linkedin/init.py
+++ b/src/jobspy/scrapers/linkedin/init.py
@@ -44,13 +44,14 @@ class LinkedInScraper(Scraper):
    band_delay = 4
    jobs_per_page = 25
-    def __init__(self, proxies: list[str] | str | None = None):
+    def __init__(self, proxies: list[str] | str | None = None, ca_cert: str | None = None):
        """
        Initializes LinkedInScraper with the LinkedIn job search url
        """
-        super().__init__(Site.LINKEDIN, proxies=proxies)
+        super().__init__(Site.LINKEDIN, proxies=proxies, ca_cert=ca_cert)
        self.session = create_session(
            proxies=self.proxies,
            ca_cert=ca_cert,
            is_tls=False,
            has_retry=True,
            delay=5,
@@ -236,7 +237,7 @@ class LinkedInScraper(Scraper):
        """
        try:
            response = self.session.get(
-                f"{self.base_url}/jobs-guest/jobs/api/jobPosting/{job_id}", timeout=5
+                f"{self.base_url}/jobs/view/{job_id}", timeout=5
            )
            response.raise_for_status()
        except:
--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@@ -100,6 +100,7 @@ class TLSRotating(RotatingProxySession, tls_client.Session):
 def create_session(
    *,
    proxies: dict | str | None = None,
    ca_cert: str | None = None,
    is_tls: bool = True,
    has_retry: bool = False,
    delay: int = 1,
@@ -119,6 +120,9 @@ def create_session(
            clear_cookies=clear_cookies,
        )
    if ca_cert:
        session.verify = ca_cert
    return session
@@ -198,6 +202,7 @@ def extract_salary(
    if not salary_str:
        return None, None, None, None
    annual_max_salary = None
    min_max_pattern = r"\$(\d+(?:,\d+)?(?:\.\d+)?)([kK]?)\s*[-—–]\s*(?:\$)?(\d+(?:,\d+)?(?:\.\d+)?)([kK]?)"
    def to_int(s):
@@ -238,6 +243,8 @@ def extract_salary(
            annual_max_salary = max_salary
        # Ensure salary range is within specified limits
        if not annual_max_salary:
            return None, None, None, None
        if (
            lower_limit <= annual_min_salary <= upper_limit
            and lower_limit <= annual_max_salary <= upper_limit
--- a/src/jobspy/scrapers/ziprecruiter/init.py
+++ b/src/jobspy/scrapers/ziprecruiter/init.py
@@ -41,14 +41,14 @@ class ZipRecruiterScraper(Scraper):
    base_url = "https://www.ziprecruiter.com"
    api_url = "https://api.ziprecruiter.com"
-    def __init__(self, proxies: list[str] | str | None = None):
+    def __init__(self, proxies: list[str] | str | None = None, ca_cert: str | None = None):
        """
        Initializes ZipRecruiterScraper with the ZipRecruiter job search url
        """
        super().__init__(Site.ZIP_RECRUITER, proxies=proxies)
        self.scraper_input = None
-        self.session = create_session(proxies=proxies)
+        self.session = create_session(proxies=proxies, ca_cert=ca_cert)
        self._get_cookies()
        self.delay = 5
@@ -200,7 +200,7 @@ class ZipRecruiterScraper(Scraper):
            script_tag = soup.find("script", type="application/json")
            if script_tag:
                job_json = json.loads(script_tag.string)
-                job_url_val = job_json["model"]["saveJobURL"]
+                job_url_val = job_json["model"].get("saveJobURL", "")
                m = re.search(r"job_url=(.+)", job_url_val)
                if m:
                    job_url_direct = m.group(1)
Author	SHA1	Message	Date
Cullen Watson	757a94853e	chore:version	2024-10-08 17:49:06 -05:00
Marcel Gozalbo Baró	6bc191d5c7	FEATURE: Add the "ca_cert" setting for providing a Certification Authority certificate in order to use proxies requiring it. (#204 )	2024-10-08 17:46:46 -05:00
Cullen Watson	0cc34287f7	fix:turkey	2024-10-02 01:31:00 -05:00
Anton Pikhteryev	923979093b	Add Malta for linkedin country support (#198 )	2024-09-19 20:41:22 -05:00
Cullen Watson	286f0e4487	docs:readme	2024-09-18 18:49:41 -05:00
Cullen Watson	f7b29d43a2	fix(indeed):sort relevance not date (#197 )	2024-09-18 18:42:25 -05:00
Cullen Watson	6f1490458c	fix key error (#186 )	2024-08-14 02:54:40 -05:00
Cullen Watson	6bb7d81ba8	change linkedin ep (#185 )	2024-08-14 02:39:43 -05:00
Cullen Watson	0e046432d1	fix:variable bug (#181 )	2024-08-05 12:47:55 -05:00