enh(indeed): mult job types

pull/56/head
Cullen Watson 2023-10-10 09:56:11 -05:00
parent 2e59ab03e3
commit ecdbf69f94
3 changed files with 13 additions and 13 deletions

View File

@ -120,11 +120,8 @@ def scrape_jobs(
] = f'<a href="{job_data["job_url"]}">{job_data["job_url"]}</a>'
job_data["site"] = site
job_data["company"] = job_data["company_name"]
if job_data["job_type"]:
# Take the first value from the job type tuple
job_data["job_type"] = job_data["job_type"].value[0]
else:
job_data["job_type"] = None
job_data["job_type"] = ", ".join(job_type.value[0] for job_type in job_data["job_type"]) if job_data[
"job_type"] else None
job_data["location"] = Location(**job_data["location"]).display_location()
@ -163,7 +160,6 @@ def scrape_jobs(
"max_amount",
"currency",
"emails",
"job_url_hyper" if hyperlinks else "job_url",
"description",
]
jobs_formatted_df = jobs_df[desired_order]

View File

@ -183,11 +183,14 @@ class JobPost(BaseModel):
location: Optional[Location]
description: Optional[str] = None
job_type: Optional[JobType] = None
job_type: Optional[list[JobType]] = None
compensation: Optional[Compensation] = None
date_posted: Optional[date] = None
benefits: Optional[str] = None
emails: Optional[list[str]] = None
num_urgent_words: int | None = None
# is_remote: bool | None = None
# company_industry: str | None = None
class JobResponse(BaseModel):

View File

@ -246,20 +246,21 @@ class IndeedScraper(Scraper):
return text_content
@staticmethod
def get_job_type(job: dict) -> Optional[JobType]:
def get_job_type(job: dict) -> Optional[list[JobType]]:
"""
Parses the job to get JobTypeIndeed
Parses the job to get list of job types
:param job:
:return:
"""
job_types: list[JobType] = []
for taxonomy in job["taxonomyAttributes"]:
if taxonomy["label"] == "job-types":
if len(taxonomy["attributes"]) > 0:
label = taxonomy["attributes"][0].get("label")
for i in range(len(taxonomy["attributes"])):
label = taxonomy["attributes"][i].get("label")
if label:
job_type_str = label.replace("-", "").replace(" ", "").lower()
return IndeedScraper.get_enum_from_job_type(job_type_str)
return None
job_types.append(IndeedScraper.get_enum_from_job_type(job_type_str))
return job_types
@staticmethod
def get_enum_from_job_type(job_type_str):