enh: listing source (#168)

pull/169/head
Cullen Watson 2024-07-15 20:30:04 -05:00 committed by GitHub
parent 0988230a24
commit edffe18e65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 19 additions and 4 deletions

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.1.57"
version = "1.1.58"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy"

View File

@ -209,6 +209,7 @@ def scrape_jobs(
"currency",
"is_remote",
"job_function",
"listing_type",
"emails",
"description",
"company_url",

View File

@ -242,6 +242,7 @@ class JobPost(BaseModel):
date_posted: date | None = None
emails: list[str] | None = None
is_remote: bool | None = None
listing_type: str | None = None
# indeed specific
company_addresses: str | None = None

View File

@ -189,7 +189,15 @@ class GlassdoorScraper(Scraper):
except:
description = None
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
company_logo = job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
company_logo = (
job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
)
listing_type = (
job_data["jobview"]
.get("header", {})
.get("adOrderSponsorshipLevel", "")
.lower()
)
return JobPost(
id=str(job_id),
title=title,
@ -203,6 +211,7 @@ class GlassdoorScraper(Scraper):
description=description,
emails=extract_emails_from_text(description) if description else None,
logo_photo_url=company_logo,
listing_type=listing_type,
)
def _fetch_job_description(self, job_id):

View File

@ -176,7 +176,7 @@ class IndeedScraper(Scraper):
keys.append("DSQF7")
if keys:
keys_str = '", "'.join(keys) # Prepare your keys string
keys_str = '", "'.join(keys)
filters_str = f"""
filters: {{
composite: {{
@ -353,7 +353,6 @@ class IndeedScraper(Scraper):
jobSearch(
{what}
{location}
includeSponsoredResults: NONE
limit: 100
sort: DATE
{cursor}
@ -365,6 +364,9 @@ class IndeedScraper(Scraper):
results {{
trackingKey
job {{
source {{
name
}}
key
title
datePublished

View File

@ -135,6 +135,7 @@ class ZipRecruiterScraper(Scraper):
self.seen_urls.add(job_url)
description = job.get("job_description", "").strip()
listing_type = job.get("buyer_type", "")
description = (
markdown_converter(description)
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN
@ -175,6 +176,7 @@ class ZipRecruiterScraper(Scraper):
description=description_full if description_full else description,
emails=extract_emails_from_text(description) if description else None,
job_url_direct=job_url_direct,
listing_type=listing_type,
)
def _get_descr(self, job_url):