Get full indeed description (#70 )

include location with 3 parts (#69 )
added salary data for linkedin (#68 )
2026-03-05 12:04:33 -08:00 · 2023-11-27 15:00:36 -06:00 · 2023-11-10 16:59:42 -06:00 · 2023-11-09 14:57:15 -06:00
5 changed files with 47 additions and 21 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.26"
+version = "1.1.29"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
 homepage = "https://github.com/Bunsly/JobSpy"
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@@ -121,7 +121,7 @@ class Country(Enum):
    # internal for ziprecruiter
    US_CANADA = ("usa/ca", "www")
-    # internal for linkeind
+    # internal for linkedin
    WORLDWIDE = ("worldwide", "www")
    @property
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@@ -235,24 +235,9 @@ class IndeedScraper(Scraper):
        if response.status_code not in range(200, 400):
            return None
        soup = BeautifulSoup(response.text, "html.parser")
        script_tag = soup.find(
            "script", text=lambda x: x and "window._initialData" in x
        )
        if not script_tag:
            return None
        script_code = script_tag.string
        match = re.search(r"window\._initialData\s*=\s*({.*?})\s*;", script_code, re.S)
        if not match:
            return None
        json_string = match.group(1)
        data = json.loads(json_string)
        try:
-            job_description = data["jobInfoWrapperModel"]["jobInfoModel"][
+            data = json.loads(response.text)
            job_description = data["body"]["jobInfoWrapperModel"]["jobInfoModel"][
                "sanitizedJobDescription"
            ]
        except (KeyError, TypeError, IndexError):
--- a/src/jobspy/scrapers/linkedin/init.py
+++ b/src/jobspy/scrapers/linkedin/init.py
@@ -16,9 +16,9 @@ from threading import Lock
 from urllib.parse import urlparse, urlunparse
 from .. import Scraper, ScraperInput, Site
-from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type
+from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type, currency_parser
 from ..exceptions import LinkedInException
-from ...jobs import JobPost, Location, JobResponse, JobType, Country
+from ...jobs import JobPost, Location, JobResponse, JobType, Country, Compensation
 class LinkedInScraper(Scraper):
@@ -135,6 +135,22 @@ class LinkedInScraper(Scraper):
        return JobResponse(jobs=job_list)
    def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]:
        salary_tag = job_card.find('span', class_='job-search-card__salary-info')
        compensation = None
        if salary_tag:
            salary_text = salary_tag.get_text(separator=' ').strip()
            salary_values = [currency_parser(value) for value in salary_text.split('-')]
            salary_min = salary_values[0]
            salary_max = salary_values[1]
            currency = salary_text[0] if salary_text[0] != '$' else 'USD'
            compensation = Compensation(
                min_amount=int(salary_min),
                max_amount=int(salary_max),
                currency=currency,
            )
        title_tag = job_card.find("span", class_="sr-only")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"
@@ -177,6 +193,7 @@ class LinkedInScraper(Scraper):
            date_posted=date_posted,
            job_url=job_url,
            job_type=job_type,
            compensation=compensation,
            benefits=benefits,
            emails=extract_emails_from_text(description) if description else None,
            num_urgent_words=count_urgent_words(description) if description else None,
@@ -261,5 +278,12 @@ class LinkedInScraper(Scraper):
                    state=state,
                    country=Country.from_string(self.country),
                )
            elif len(parts) == 3:
                city, state, country = parts
                location = Location(
                    city=city,
                    state=state,
                    country=Country.from_string(country),
                )
        return location
--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@@ -1,4 +1,5 @@
 import re
 import numpy as np
 import requests
 import tls_client
@@ -62,3 +63,19 @@ def get_enum_from_job_type(job_type_str: str) -> JobType | None:
        if job_type_str in job_type.value:
            res = job_type
    return res
 def currency_parser(cur_str):
    # Remove any non-numerical characters
    # except for ',' '.' or '-' (e.g. EUR)
    cur_str = re.sub("[^-0-9.,]", '', cur_str)
    # Remove any 000s separators (either , or .)
    cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
    if '.' in list(cur_str[-3:]):
        num = float(cur_str)
    elif ',' in list(cur_str[-3:]):
        num = float(cur_str.replace(',', '.'))
    else:
        num = float(cur_str)
    return np.round(num, 2)
Author	SHA1	Message	Date
Vincent Yan	eed7fca300	Get full indeed description (#70 )	2023-11-27 15:00:36 -06:00
Faraz Khan	dfb8c18c51	include location with 3 parts (#69 )	2023-11-10 16:59:42 -06:00
Faraz Khan	81f70ff8a5	added salary data for linkedin (#68 )	2023-11-09 14:57:15 -06:00