From be090de499c44f32422f6dac6b126d1bbc448201 Mon Sep 17 00:00:00 2001 From: Vincent Yan Date: Sun, 26 Nov 2023 13:56:00 -0500 Subject: [PATCH] fix: get full indeed description from json --- src/jobspy/scrapers/indeed/__init__.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index 61c19f7..c758043 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -235,24 +235,9 @@ class IndeedScraper(Scraper): if response.status_code not in range(200, 400): return None - soup = BeautifulSoup(response.text, "html.parser") - script_tag = soup.find( - "script", text=lambda x: x and "window._initialData" in x - ) - - if not script_tag: - return None - - script_code = script_tag.string - match = re.search(r"window\._initialData\s*=\s*({.*?})\s*;", script_code, re.S) - - if not match: - return None - - json_string = match.group(1) - data = json.loads(json_string) try: - job_description = data["jobInfoWrapperModel"]["jobInfoModel"][ + data = json.loads(response.text) + job_description = data["body"]["jobInfoWrapperModel"]["jobInfoModel"][ "sanitizedJobDescription" ] except (KeyError, TypeError, IndexError):