mirror of https://github.com/Bunsly/JobSpy
enh(indeed): mult job types
parent
2e59ab03e3
commit
ecdbf69f94
|
@ -120,11 +120,8 @@ def scrape_jobs(
|
||||||
] = f'<a href="{job_data["job_url"]}">{job_data["job_url"]}</a>'
|
] = f'<a href="{job_data["job_url"]}">{job_data["job_url"]}</a>'
|
||||||
job_data["site"] = site
|
job_data["site"] = site
|
||||||
job_data["company"] = job_data["company_name"]
|
job_data["company"] = job_data["company_name"]
|
||||||
if job_data["job_type"]:
|
job_data["job_type"] = ", ".join(job_type.value[0] for job_type in job_data["job_type"]) if job_data[
|
||||||
# Take the first value from the job type tuple
|
"job_type"] else None
|
||||||
job_data["job_type"] = job_data["job_type"].value[0]
|
|
||||||
else:
|
|
||||||
job_data["job_type"] = None
|
|
||||||
|
|
||||||
job_data["location"] = Location(**job_data["location"]).display_location()
|
job_data["location"] = Location(**job_data["location"]).display_location()
|
||||||
|
|
||||||
|
@ -163,7 +160,6 @@ def scrape_jobs(
|
||||||
"max_amount",
|
"max_amount",
|
||||||
"currency",
|
"currency",
|
||||||
"emails",
|
"emails",
|
||||||
"job_url_hyper" if hyperlinks else "job_url",
|
|
||||||
"description",
|
"description",
|
||||||
]
|
]
|
||||||
jobs_formatted_df = jobs_df[desired_order]
|
jobs_formatted_df = jobs_df[desired_order]
|
||||||
|
|
|
@ -183,11 +183,14 @@ class JobPost(BaseModel):
|
||||||
location: Optional[Location]
|
location: Optional[Location]
|
||||||
|
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
job_type: Optional[JobType] = None
|
job_type: Optional[list[JobType]] = None
|
||||||
compensation: Optional[Compensation] = None
|
compensation: Optional[Compensation] = None
|
||||||
date_posted: Optional[date] = None
|
date_posted: Optional[date] = None
|
||||||
benefits: Optional[str] = None
|
benefits: Optional[str] = None
|
||||||
emails: Optional[list[str]] = None
|
emails: Optional[list[str]] = None
|
||||||
|
num_urgent_words: int | None = None
|
||||||
|
# is_remote: bool | None = None
|
||||||
|
# company_industry: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class JobResponse(BaseModel):
|
class JobResponse(BaseModel):
|
||||||
|
|
|
@ -246,20 +246,21 @@ class IndeedScraper(Scraper):
|
||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_job_type(job: dict) -> Optional[JobType]:
|
def get_job_type(job: dict) -> Optional[list[JobType]]:
|
||||||
"""
|
"""
|
||||||
Parses the job to get JobTypeIndeed
|
Parses the job to get list of job types
|
||||||
:param job:
|
:param job:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
job_types: list[JobType] = []
|
||||||
for taxonomy in job["taxonomyAttributes"]:
|
for taxonomy in job["taxonomyAttributes"]:
|
||||||
if taxonomy["label"] == "job-types":
|
if taxonomy["label"] == "job-types":
|
||||||
if len(taxonomy["attributes"]) > 0:
|
for i in range(len(taxonomy["attributes"])):
|
||||||
label = taxonomy["attributes"][0].get("label")
|
label = taxonomy["attributes"][i].get("label")
|
||||||
if label:
|
if label:
|
||||||
job_type_str = label.replace("-", "").replace(" ", "").lower()
|
job_type_str = label.replace("-", "").replace(" ", "").lower()
|
||||||
return IndeedScraper.get_enum_from_job_type(job_type_str)
|
job_types.append(IndeedScraper.get_enum_from_job_type(job_type_str))
|
||||||
return None
|
return job_types
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_enum_from_job_type(job_type_str):
|
def get_enum_from_job_type(job_type_str):
|
||||||
|
|
Loading…
Reference in New Issue