mirror of https://github.com/Bunsly/JobSpy
chore: id added for JobPost schema (#152)
parent
1ffdb1756f
commit
08d63a87a2
|
@ -168,6 +168,7 @@ def scrape_jobs(
|
||||||
|
|
||||||
# Desired column order
|
# Desired column order
|
||||||
desired_order = [
|
desired_order = [
|
||||||
|
"id",
|
||||||
"site",
|
"site",
|
||||||
"job_url_hyper" if hyperlinks else "job_url",
|
"job_url_hyper" if hyperlinks else "job_url",
|
||||||
"job_url_direct",
|
"job_url_direct",
|
||||||
|
|
|
@ -226,6 +226,7 @@ class DescriptionFormat(Enum):
|
||||||
|
|
||||||
|
|
||||||
class JobPost(BaseModel):
|
class JobPost(BaseModel):
|
||||||
|
id: str | None = None
|
||||||
title: str
|
title: str
|
||||||
company_name: str | None
|
company_name: str | None
|
||||||
job_url: str
|
job_url: str
|
||||||
|
|
|
@ -190,6 +190,7 @@ class GlassdoorScraper(Scraper):
|
||||||
description = None
|
description = None
|
||||||
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
|
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
|
||||||
return JobPost(
|
return JobPost(
|
||||||
|
id=str(job_id),
|
||||||
title=title,
|
title=title,
|
||||||
company_url=company_url if company_id else None,
|
company_url=company_url if company_id else None,
|
||||||
company_name=company_name,
|
company_name=company_name,
|
||||||
|
|
|
@ -213,6 +213,7 @@ class IndeedScraper(Scraper):
|
||||||
employer_details = employer.get("employerDetails", {}) if employer else {}
|
employer_details = employer.get("employerDetails", {}) if employer else {}
|
||||||
rel_url = job["employer"]["relativeCompanyPageUrl"] if job["employer"] else None
|
rel_url = job["employer"]["relativeCompanyPageUrl"] if job["employer"] else None
|
||||||
return JobPost(
|
return JobPost(
|
||||||
|
id=str(job["key"]),
|
||||||
title=job["title"],
|
title=job["title"],
|
||||||
description=description,
|
description=description,
|
||||||
company_name=job["employer"].get("name") if job.get("employer") else None,
|
company_name=job["employer"].get("name") if job.get("employer") else None,
|
||||||
|
|
|
@ -209,6 +209,7 @@ class LinkedInScraper(Scraper):
|
||||||
job_details = self._get_job_details(job_url)
|
job_details = self._get_job_details(job_url)
|
||||||
|
|
||||||
return JobPost(
|
return JobPost(
|
||||||
|
id=self._get_id(job_url),
|
||||||
title=title,
|
title=title,
|
||||||
company_name=company,
|
company_name=company,
|
||||||
company_url=company_url,
|
company_url=company_url,
|
||||||
|
@ -223,6 +224,16 @@ class LinkedInScraper(Scraper):
|
||||||
logo_photo_url=job_details.get("logo_photo_url"),
|
logo_photo_url=job_details.get("logo_photo_url"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _get_id(self, url: str):
|
||||||
|
"""
|
||||||
|
Extracts the job id from the job url
|
||||||
|
:param url:
|
||||||
|
:return: str
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
return url.split("/")[-1]
|
||||||
|
|
||||||
def _get_job_details(self, job_page_url: str) -> dict:
|
def _get_job_details(self, job_page_url: str) -> dict:
|
||||||
"""
|
"""
|
||||||
Retrieves job description and other job details by going to the job page url
|
Retrieves job description and other job details by going to the job page url
|
||||||
|
|
|
@ -151,6 +151,7 @@ class ZipRecruiterScraper(Scraper):
|
||||||
comp_max = int(job["compensation_max"]) if "compensation_max" in job else None
|
comp_max = int(job["compensation_max"]) if "compensation_max" in job else None
|
||||||
comp_currency = job.get("compensation_currency")
|
comp_currency = job.get("compensation_currency")
|
||||||
return JobPost(
|
return JobPost(
|
||||||
|
id=str(job['listing_key']),
|
||||||
title=title,
|
title=title,
|
||||||
company_name=company,
|
company_name=company,
|
||||||
location=location,
|
location=location,
|
||||||
|
|
Loading…
Reference in New Issue