mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 12:04:33 -08:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
65d2e5e707 | ||
|
|
08d63a87a2 |
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "python-jobspy"
|
||||
version = "1.1.52"
|
||||
version = "1.1.53"
|
||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/Bunsly/JobSpy"
|
||||
|
||||
@@ -168,6 +168,7 @@ def scrape_jobs(
|
||||
|
||||
# Desired column order
|
||||
desired_order = [
|
||||
"id",
|
||||
"site",
|
||||
"job_url_hyper" if hyperlinks else "job_url",
|
||||
"job_url_direct",
|
||||
|
||||
@@ -226,6 +226,7 @@ class DescriptionFormat(Enum):
|
||||
|
||||
|
||||
class JobPost(BaseModel):
|
||||
id: str | None = None
|
||||
title: str
|
||||
company_name: str | None
|
||||
job_url: str
|
||||
|
||||
@@ -190,6 +190,7 @@ class GlassdoorScraper(Scraper):
|
||||
description = None
|
||||
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
|
||||
return JobPost(
|
||||
id=str(job_id),
|
||||
title=title,
|
||||
company_url=company_url if company_id else None,
|
||||
company_name=company_name,
|
||||
|
||||
@@ -213,6 +213,7 @@ class IndeedScraper(Scraper):
|
||||
employer_details = employer.get("employerDetails", {}) if employer else {}
|
||||
rel_url = job["employer"]["relativeCompanyPageUrl"] if job["employer"] else None
|
||||
return JobPost(
|
||||
id=str(job["key"]),
|
||||
title=job["title"],
|
||||
description=description,
|
||||
company_name=job["employer"].get("name") if job.get("employer") else None,
|
||||
|
||||
@@ -209,6 +209,7 @@ class LinkedInScraper(Scraper):
|
||||
job_details = self._get_job_details(job_url)
|
||||
|
||||
return JobPost(
|
||||
id=self._get_id(job_url),
|
||||
title=title,
|
||||
company_name=company,
|
||||
company_url=company_url,
|
||||
@@ -223,6 +224,16 @@ class LinkedInScraper(Scraper):
|
||||
logo_photo_url=job_details.get("logo_photo_url"),
|
||||
)
|
||||
|
||||
def _get_id(self, url: str):
|
||||
"""
|
||||
Extracts the job id from the job url
|
||||
:param url:
|
||||
:return: str
|
||||
"""
|
||||
if not url:
|
||||
return None
|
||||
return url.split("/")[-1]
|
||||
|
||||
def _get_job_details(self, job_page_url: str) -> dict:
|
||||
"""
|
||||
Retrieves job description and other job details by going to the job page url
|
||||
|
||||
@@ -151,6 +151,7 @@ class ZipRecruiterScraper(Scraper):
|
||||
comp_max = int(job["compensation_max"]) if "compensation_max" in job else None
|
||||
comp_currency = job.get("compensation_currency")
|
||||
return JobPost(
|
||||
id=str(job['listing_key']),
|
||||
title=title,
|
||||
company_name=company,
|
||||
location=location,
|
||||
|
||||
Reference in New Issue
Block a user