mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-11 07:54:33 -07:00
enh: full description param (#85)
This commit is contained in:
@@ -78,7 +78,7 @@ class IndeedScraper(Scraper):
|
||||
if sc_values:
|
||||
params["sc"] = "0kf:" + "".join(sc_values) + ";"
|
||||
try:
|
||||
session = create_session(self.proxy, is_tls=True)
|
||||
session = create_session(self.proxy)
|
||||
response = session.get(
|
||||
f"{self.url}/jobs",
|
||||
headers=self.get_headers(),
|
||||
@@ -140,7 +140,8 @@ class IndeedScraper(Scraper):
|
||||
date_posted = datetime.fromtimestamp(timestamp_seconds)
|
||||
date_posted = date_posted.strftime("%Y-%m-%d")
|
||||
|
||||
description = self.get_description(job_url)
|
||||
description = self.get_description(job_url) if scraper_input.full_description else None
|
||||
|
||||
with io.StringIO(job["snippet"]) as f:
|
||||
soup_io = BeautifulSoup(f, "html.parser")
|
||||
li_elements = soup_io.find_all("li")
|
||||
@@ -246,7 +247,7 @@ class IndeedScraper(Scraper):
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(job_description, "html.parser")
|
||||
text_content = " ".join(soup.get_text(separator=" ").split()).strip()
|
||||
text_content = "\n".join(soup.stripped_strings)
|
||||
|
||||
return text_content
|
||||
|
||||
|
||||
Reference in New Issue
Block a user