enh: full description param (#85)

This commit is contained in:
Cullen Watson
2024-01-22 20:22:32 -06:00
committed by GitHub
parent 2ec3b04777
commit 5b3627b244
8 changed files with 115 additions and 50 deletions

View File

@@ -78,7 +78,7 @@ class IndeedScraper(Scraper):
if sc_values:
params["sc"] = "0kf:" + "".join(sc_values) + ";"
try:
session = create_session(self.proxy, is_tls=True)
session = create_session(self.proxy)
response = session.get(
f"{self.url}/jobs",
headers=self.get_headers(),
@@ -140,7 +140,8 @@ class IndeedScraper(Scraper):
date_posted = datetime.fromtimestamp(timestamp_seconds)
date_posted = date_posted.strftime("%Y-%m-%d")
description = self.get_description(job_url)
description = self.get_description(job_url) if scraper_input.full_description else None
with io.StringIO(job["snippet"]) as f:
soup_io = BeautifulSoup(f, "html.parser")
li_elements = soup_io.find_all("li")
@@ -246,7 +247,7 @@ class IndeedScraper(Scraper):
return None
soup = BeautifulSoup(job_description, "html.parser")
text_content = " ".join(soup.get_text(separator=" ").split()).strip()
text_content = "\n".join(soup.stripped_strings)
return text_content