fix(indeed): check both dicts for compensation

pull/104/head
Cullen Watson 2024-02-09 10:39:19 -06:00
parent 5e21774b05
commit 2b723819f2
1 changed files with 41 additions and 14 deletions

View File

@ -103,19 +103,6 @@ class IndeedScraper(Scraper):
self.seen_urls.add(job_url) self.seen_urls.add(job_url)
description = job_detailed['description']['html'] description = job_detailed['description']['html']
compensation = None
comp = job_detailed['compensation']['baseSalary']
if comp:
interval = CompensationInterval.get_interval(comp['unitOfWork'])
interval = self.get_correct_interval(comp['unitOfWork'])
if interval:
compensation = Compensation(
interval=interval,
min_amount=round(comp['range'].get('min', 0), 2) if comp['range'].get(
'min') is not None else None,
max_amount = round(comp['range'].get('max', 0), 2) if comp['range'].get('max') is not None else None,
currency=job_detailed['compensation']['currencyCode']
)
job_type = IndeedScraper.get_job_type(job) job_type = IndeedScraper.get_job_type(job)
timestamp_seconds = job["pubDate"] / 1000 timestamp_seconds = job["pubDate"] / 1000
@ -133,7 +120,7 @@ class IndeedScraper(Scraper):
country=self.country, country=self.country,
), ),
job_type=job_type, job_type=job_type,
compensation=compensation, compensation=self.get_compensation(job, job_detailed),
date_posted=date_posted, date_posted=date_posted,
job_url=job_url_client, job_url=job_url_client,
emails=extract_emails_from_text(description) if description else None, emails=extract_emails_from_text(description) if description else None,
@ -255,6 +242,44 @@ class IndeedScraper(Scraper):
job_types.append(job_type) job_types.append(job_type)
return job_types return job_types
@staticmethod
def get_compensation(job: dict, job_detailed: dict) -> Compensation:
"""
Parses the job to get
:param job:
:param job_detailed:
:return: compensation object
"""
comp = job_detailed['compensation']['baseSalary']
if comp:
interval = IndeedScraper.get_correct_interval(comp['unitOfWork'])
if interval:
return Compensation(
interval=interval,
min_amount=round(comp['range'].get('min'), 2) if comp['range'].get('min') is not None else None,
max_amount=round(comp['range'].get('max'), 2) if comp['range'].get('max') is not None else None,
currency=job_detailed['compensation']['currencyCode']
)
extracted_salary = job.get("extractedSalary")
compensation = None
if extracted_salary:
salary_snippet = job.get("salarySnippet")
currency = salary_snippet.get("currency") if salary_snippet else None
interval = (extracted_salary.get("type"),)
if isinstance(interval, tuple):
interval = interval[0]
interval = interval.upper()
if interval in CompensationInterval.__members__:
compensation = Compensation(
interval=CompensationInterval[interval],
min_amount=int(extracted_salary.get("min")),
max_amount=int(extracted_salary.get("max")),
currency=currency,
)
return compensation
@staticmethod @staticmethod
def parse_jobs(soup: BeautifulSoup) -> dict: def parse_jobs(soup: BeautifulSoup) -> dict:
""" """
@ -453,8 +478,10 @@ class IndeedScraper(Scraper):
@staticmethod @staticmethod
def get_correct_interval(interval: str) -> CompensationInterval: def get_correct_interval(interval: str) -> CompensationInterval:
interval_mapping = { interval_mapping = {
"DAY": "DAILY",
"YEAR": "YEARLY", "YEAR": "YEARLY",
"HOUR": "HOURLY", "HOUR": "HOURLY",
"WEEK": "WEEKLY",
"MONTH": "MONTHLY" "MONTH": "MONTHLY"
} }
mapped_interval = interval_mapping.get(interval.upper(), None) mapped_interval = interval_mapping.get(interval.upper(), None)