mirror of https://github.com/Bunsly/JobSpy
parent
dfb8c18c51
commit
eed7fca300
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.28"
|
version = "1.1.29"
|
||||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/JobSpy"
|
homepage = "https://github.com/Bunsly/JobSpy"
|
||||||
|
|
|
@ -121,7 +121,7 @@ class Country(Enum):
|
||||||
# internal for ziprecruiter
|
# internal for ziprecruiter
|
||||||
US_CANADA = ("usa/ca", "www")
|
US_CANADA = ("usa/ca", "www")
|
||||||
|
|
||||||
# internal for linkeind
|
# internal for linkedin
|
||||||
WORLDWIDE = ("worldwide", "www")
|
WORLDWIDE = ("worldwide", "www")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -235,24 +235,9 @@ class IndeedScraper(Scraper):
|
||||||
if response.status_code not in range(200, 400):
|
if response.status_code not in range(200, 400):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
|
||||||
script_tag = soup.find(
|
|
||||||
"script", text=lambda x: x and "window._initialData" in x
|
|
||||||
)
|
|
||||||
|
|
||||||
if not script_tag:
|
|
||||||
return None
|
|
||||||
|
|
||||||
script_code = script_tag.string
|
|
||||||
match = re.search(r"window\._initialData\s*=\s*({.*?})\s*;", script_code, re.S)
|
|
||||||
|
|
||||||
if not match:
|
|
||||||
return None
|
|
||||||
|
|
||||||
json_string = match.group(1)
|
|
||||||
data = json.loads(json_string)
|
|
||||||
try:
|
try:
|
||||||
job_description = data["jobInfoWrapperModel"]["jobInfoModel"][
|
data = json.loads(response.text)
|
||||||
|
job_description = data["body"]["jobInfoWrapperModel"]["jobInfoModel"][
|
||||||
"sanitizedJobDescription"
|
"sanitizedJobDescription"
|
||||||
]
|
]
|
||||||
except (KeyError, TypeError, IndexError):
|
except (KeyError, TypeError, IndexError):
|
||||||
|
|
Loading…
Reference in New Issue