Get full indeed description (#70)

pull/77/head v1.1.29
Vincent Yan 2023-11-27 16:00:36 -05:00 committed by GitHub
parent dfb8c18c51
commit eed7fca300
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 4 additions and 19 deletions

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.28" version = "1.1.29"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy" homepage = "https://github.com/Bunsly/JobSpy"

View File

@ -121,7 +121,7 @@ class Country(Enum):
# internal for ziprecruiter # internal for ziprecruiter
US_CANADA = ("usa/ca", "www") US_CANADA = ("usa/ca", "www")
# internal for linkeind # internal for linkedin
WORLDWIDE = ("worldwide", "www") WORLDWIDE = ("worldwide", "www")
@property @property

View File

@ -235,24 +235,9 @@ class IndeedScraper(Scraper):
if response.status_code not in range(200, 400): if response.status_code not in range(200, 400):
return None return None
soup = BeautifulSoup(response.text, "html.parser")
script_tag = soup.find(
"script", text=lambda x: x and "window._initialData" in x
)
if not script_tag:
return None
script_code = script_tag.string
match = re.search(r"window\._initialData\s*=\s*({.*?})\s*;", script_code, re.S)
if not match:
return None
json_string = match.group(1)
data = json.loads(json_string)
try: try:
job_description = data["jobInfoWrapperModel"]["jobInfoModel"][ data = json.loads(response.text)
job_description = data["body"]["jobInfoWrapperModel"]["jobInfoModel"][
"sanitizedJobDescription" "sanitizedJobDescription"
] ]
except (KeyError, TypeError, IndexError): except (KeyError, TypeError, IndexError):