mirror of https://github.com/Bunsly/JobSpy
enh: listing source (#168)
parent
0988230a24
commit
edffe18e65
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.57"
|
version = "1.1.58"
|
||||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/JobSpy"
|
homepage = "https://github.com/Bunsly/JobSpy"
|
||||||
|
|
|
@ -209,6 +209,7 @@ def scrape_jobs(
|
||||||
"currency",
|
"currency",
|
||||||
"is_remote",
|
"is_remote",
|
||||||
"job_function",
|
"job_function",
|
||||||
|
"listing_type",
|
||||||
"emails",
|
"emails",
|
||||||
"description",
|
"description",
|
||||||
"company_url",
|
"company_url",
|
||||||
|
|
|
@ -242,6 +242,7 @@ class JobPost(BaseModel):
|
||||||
date_posted: date | None = None
|
date_posted: date | None = None
|
||||||
emails: list[str] | None = None
|
emails: list[str] | None = None
|
||||||
is_remote: bool | None = None
|
is_remote: bool | None = None
|
||||||
|
listing_type: str | None = None
|
||||||
|
|
||||||
# indeed specific
|
# indeed specific
|
||||||
company_addresses: str | None = None
|
company_addresses: str | None = None
|
||||||
|
|
|
@ -189,7 +189,15 @@ class GlassdoorScraper(Scraper):
|
||||||
except:
|
except:
|
||||||
description = None
|
description = None
|
||||||
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
|
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
|
||||||
company_logo = job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
|
company_logo = (
|
||||||
|
job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
|
||||||
|
)
|
||||||
|
listing_type = (
|
||||||
|
job_data["jobview"]
|
||||||
|
.get("header", {})
|
||||||
|
.get("adOrderSponsorshipLevel", "")
|
||||||
|
.lower()
|
||||||
|
)
|
||||||
return JobPost(
|
return JobPost(
|
||||||
id=str(job_id),
|
id=str(job_id),
|
||||||
title=title,
|
title=title,
|
||||||
|
@ -203,6 +211,7 @@ class GlassdoorScraper(Scraper):
|
||||||
description=description,
|
description=description,
|
||||||
emails=extract_emails_from_text(description) if description else None,
|
emails=extract_emails_from_text(description) if description else None,
|
||||||
logo_photo_url=company_logo,
|
logo_photo_url=company_logo,
|
||||||
|
listing_type=listing_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _fetch_job_description(self, job_id):
|
def _fetch_job_description(self, job_id):
|
||||||
|
|
|
@ -176,7 +176,7 @@ class IndeedScraper(Scraper):
|
||||||
keys.append("DSQF7")
|
keys.append("DSQF7")
|
||||||
|
|
||||||
if keys:
|
if keys:
|
||||||
keys_str = '", "'.join(keys) # Prepare your keys string
|
keys_str = '", "'.join(keys)
|
||||||
filters_str = f"""
|
filters_str = f"""
|
||||||
filters: {{
|
filters: {{
|
||||||
composite: {{
|
composite: {{
|
||||||
|
@ -353,7 +353,6 @@ class IndeedScraper(Scraper):
|
||||||
jobSearch(
|
jobSearch(
|
||||||
{what}
|
{what}
|
||||||
{location}
|
{location}
|
||||||
includeSponsoredResults: NONE
|
|
||||||
limit: 100
|
limit: 100
|
||||||
sort: DATE
|
sort: DATE
|
||||||
{cursor}
|
{cursor}
|
||||||
|
@ -365,6 +364,9 @@ class IndeedScraper(Scraper):
|
||||||
results {{
|
results {{
|
||||||
trackingKey
|
trackingKey
|
||||||
job {{
|
job {{
|
||||||
|
source {{
|
||||||
|
name
|
||||||
|
}}
|
||||||
key
|
key
|
||||||
title
|
title
|
||||||
datePublished
|
datePublished
|
||||||
|
|
|
@ -135,6 +135,7 @@ class ZipRecruiterScraper(Scraper):
|
||||||
self.seen_urls.add(job_url)
|
self.seen_urls.add(job_url)
|
||||||
|
|
||||||
description = job.get("job_description", "").strip()
|
description = job.get("job_description", "").strip()
|
||||||
|
listing_type = job.get("buyer_type", "")
|
||||||
description = (
|
description = (
|
||||||
markdown_converter(description)
|
markdown_converter(description)
|
||||||
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN
|
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN
|
||||||
|
@ -175,6 +176,7 @@ class ZipRecruiterScraper(Scraper):
|
||||||
description=description_full if description_full else description,
|
description=description_full if description_full else description,
|
||||||
emails=extract_emails_from_text(description) if description else None,
|
emails=extract_emails_from_text(description) if description else None,
|
||||||
job_url_direct=job_url_direct,
|
job_url_direct=job_url_direct,
|
||||||
|
listing_type=listing_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_descr(self, job_url):
|
def _get_descr(self, job_url):
|
||||||
|
|
Loading…
Reference in New Issue