Get full indeed description (#70)

2023-11-27 16:00:36 -05:00 · 2023-11-27 16:00:36 -05:00 · eed7fca300
parent dfb8c18c51
commit eed7fca300
3 changed files with 4 additions and 19 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.28"
+version = "1.1.29"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
 homepage = "https://github.com/Bunsly/JobSpy"
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@ -121,7 +121,7 @@ class Country(Enum):
    # internal for ziprecruiter
    US_CANADA = ("usa/ca", "www")

-    # internal for linkeind
+    # internal for linkedin
    WORLDWIDE = ("worldwide", "www")

    @property
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@ -235,24 +235,9 @@ class IndeedScraper(Scraper):
        if response.status_code not in range(200, 400):
            return None

-        soup = BeautifulSoup(response.text, "html.parser")
-        script_tag = soup.find(
-            "script", text=lambda x: x and "window._initialData" in x
-        )
-
-        if not script_tag:
-            return None
-
-        script_code = script_tag.string
-        match = re.search(r"window\._initialData\s*=\s*({.*?})\s*;", script_code, re.S)
-
-        if not match:
-            return None
-
-        json_string = match.group(1)
-        data = json.loads(json_string)
        try:
-            job_description = data["jobInfoWrapperModel"]["jobInfoModel"][
+            data = json.loads(response.text)
+            job_description = data["body"]["jobInfoWrapperModel"]["jobInfoModel"][
                "sanitizedJobDescription"
            ]
        except (KeyError, TypeError, IndexError):