enh: source

pull/168/head
Cullen Watson 2024-07-15 20:28:37 -05:00
parent 0988230a24
commit eeddbc9ae7
5 changed files with 18 additions and 3 deletions

View File

@ -209,6 +209,7 @@ def scrape_jobs(
"currency",
"is_remote",
"job_function",
"listing_type",
"emails",
"description",
"company_url",

View File

@ -242,6 +242,7 @@ class JobPost(BaseModel):
date_posted: date | None = None
emails: list[str] | None = None
is_remote: bool | None = None
listing_type: str | None = None
# indeed specific
company_addresses: str | None = None

View File

@ -189,7 +189,15 @@ class GlassdoorScraper(Scraper):
except:
description = None
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
company_logo = job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
company_logo = (
job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
)
listing_type = (
job_data["jobview"]
.get("header", {})
.get("adOrderSponsorshipLevel", "")
.lower()
)
return JobPost(
id=str(job_id),
title=title,
@ -203,6 +211,7 @@ class GlassdoorScraper(Scraper):
description=description,
emails=extract_emails_from_text(description) if description else None,
logo_photo_url=company_logo,
listing_type=listing_type,
)
def _fetch_job_description(self, job_id):

View File

@ -176,7 +176,7 @@ class IndeedScraper(Scraper):
keys.append("DSQF7")
if keys:
keys_str = '", "'.join(keys) # Prepare your keys string
keys_str = '", "'.join(keys)
filters_str = f"""
filters: {{
composite: {{
@ -353,7 +353,6 @@ class IndeedScraper(Scraper):
jobSearch(
{what}
{location}
includeSponsoredResults: NONE
limit: 100
sort: DATE
{cursor}
@ -365,6 +364,9 @@ class IndeedScraper(Scraper):
results {{
trackingKey
job {{
source {{
name
}}
key
title
datePublished

View File

@ -135,6 +135,7 @@ class ZipRecruiterScraper(Scraper):
self.seen_urls.add(job_url)
description = job.get("job_description", "").strip()
listing_type = job.get("buyer_type", "")
description = (
markdown_converter(description)
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN
@ -175,6 +176,7 @@ class ZipRecruiterScraper(Scraper):
description=description_full if description_full else description,
emails=extract_emails_from_text(description) if description else None,
job_url_direct=job_url_direct,
listing_type=listing_type,
)
def _get_descr(self, job_url):