mirror of https://github.com/Bunsly/JobSpy
fix(indeed): check both dicts for compensation
parent
5e21774b05
commit
2b723819f2
|
@ -103,19 +103,6 @@ class IndeedScraper(Scraper):
|
||||||
self.seen_urls.add(job_url)
|
self.seen_urls.add(job_url)
|
||||||
description = job_detailed['description']['html']
|
description = job_detailed['description']['html']
|
||||||
|
|
||||||
compensation = None
|
|
||||||
comp = job_detailed['compensation']['baseSalary']
|
|
||||||
if comp:
|
|
||||||
interval = CompensationInterval.get_interval(comp['unitOfWork'])
|
|
||||||
interval = self.get_correct_interval(comp['unitOfWork'])
|
|
||||||
if interval:
|
|
||||||
compensation = Compensation(
|
|
||||||
interval=interval,
|
|
||||||
min_amount=round(comp['range'].get('min', 0), 2) if comp['range'].get(
|
|
||||||
'min') is not None else None,
|
|
||||||
max_amount = round(comp['range'].get('max', 0), 2) if comp['range'].get('max') is not None else None,
|
|
||||||
currency=job_detailed['compensation']['currencyCode']
|
|
||||||
)
|
|
||||||
|
|
||||||
job_type = IndeedScraper.get_job_type(job)
|
job_type = IndeedScraper.get_job_type(job)
|
||||||
timestamp_seconds = job["pubDate"] / 1000
|
timestamp_seconds = job["pubDate"] / 1000
|
||||||
|
@ -133,7 +120,7 @@ class IndeedScraper(Scraper):
|
||||||
country=self.country,
|
country=self.country,
|
||||||
),
|
),
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
compensation=compensation,
|
compensation=self.get_compensation(job, job_detailed),
|
||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=job_url_client,
|
job_url=job_url_client,
|
||||||
emails=extract_emails_from_text(description) if description else None,
|
emails=extract_emails_from_text(description) if description else None,
|
||||||
|
@ -255,6 +242,44 @@ class IndeedScraper(Scraper):
|
||||||
job_types.append(job_type)
|
job_types.append(job_type)
|
||||||
return job_types
|
return job_types
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_compensation(job: dict, job_detailed: dict) -> Compensation:
|
||||||
|
"""
|
||||||
|
Parses the job to get
|
||||||
|
:param job:
|
||||||
|
:param job_detailed:
|
||||||
|
:return: compensation object
|
||||||
|
"""
|
||||||
|
comp = job_detailed['compensation']['baseSalary']
|
||||||
|
if comp:
|
||||||
|
interval = IndeedScraper.get_correct_interval(comp['unitOfWork'])
|
||||||
|
if interval:
|
||||||
|
return Compensation(
|
||||||
|
interval=interval,
|
||||||
|
min_amount=round(comp['range'].get('min'), 2) if comp['range'].get('min') is not None else None,
|
||||||
|
max_amount=round(comp['range'].get('max'), 2) if comp['range'].get('max') is not None else None,
|
||||||
|
currency=job_detailed['compensation']['currencyCode']
|
||||||
|
)
|
||||||
|
|
||||||
|
extracted_salary = job.get("extractedSalary")
|
||||||
|
compensation = None
|
||||||
|
if extracted_salary:
|
||||||
|
salary_snippet = job.get("salarySnippet")
|
||||||
|
currency = salary_snippet.get("currency") if salary_snippet else None
|
||||||
|
interval = (extracted_salary.get("type"),)
|
||||||
|
if isinstance(interval, tuple):
|
||||||
|
interval = interval[0]
|
||||||
|
|
||||||
|
interval = interval.upper()
|
||||||
|
if interval in CompensationInterval.__members__:
|
||||||
|
compensation = Compensation(
|
||||||
|
interval=CompensationInterval[interval],
|
||||||
|
min_amount=int(extracted_salary.get("min")),
|
||||||
|
max_amount=int(extracted_salary.get("max")),
|
||||||
|
currency=currency,
|
||||||
|
)
|
||||||
|
return compensation
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_jobs(soup: BeautifulSoup) -> dict:
|
def parse_jobs(soup: BeautifulSoup) -> dict:
|
||||||
"""
|
"""
|
||||||
|
@ -453,8 +478,10 @@ class IndeedScraper(Scraper):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_correct_interval(interval: str) -> CompensationInterval:
|
def get_correct_interval(interval: str) -> CompensationInterval:
|
||||||
interval_mapping = {
|
interval_mapping = {
|
||||||
|
"DAY": "DAILY",
|
||||||
"YEAR": "YEARLY",
|
"YEAR": "YEARLY",
|
||||||
"HOUR": "HOURLY",
|
"HOUR": "HOURLY",
|
||||||
|
"WEEK": "WEEKLY",
|
||||||
"MONTH": "MONTHLY"
|
"MONTH": "MONTHLY"
|
||||||
}
|
}
|
||||||
mapped_interval = interval_mapping.get(interval.upper(), None)
|
mapped_interval = interval_mapping.get(interval.upper(), None)
|
||||||
|
|
Loading…
Reference in New Issue