feat: Add job_level to linkedin scraper

pull/166/head
Ali Bakhshi Ilani 2024-07-06 20:00:45 +03:30
parent 3eef5cbe72
commit dda1f53d23
3 changed files with 29 additions and 0 deletions

View File

@ -208,6 +208,7 @@ def scrape_jobs(
"max_amount", "max_amount",
"currency", "currency",
"is_remote", "is_remote",
"job_level",
"job_function", "job_function",
"emails", "emails",
"description", "description",

View File

@ -243,6 +243,9 @@ class JobPost(BaseModel):
emails: list[str] | None = None emails: list[str] | None = None
is_remote: bool | None = None is_remote: bool | None = None
# linkedin specific
job_level: str | None = None
# linkedin and indeed specific # linkedin and indeed specific
company_industry: str | None = None company_industry: str | None = None

View File

@ -219,6 +219,7 @@ class LinkedInScraper(Scraper):
job_url=f"{self.base_url}/jobs/view/{job_id}", job_url=f"{self.base_url}/jobs/view/{job_id}",
compensation=compensation, compensation=compensation,
job_type=job_details.get("job_type"), job_type=job_details.get("job_type"),
job_level=job_details.get("job_level"),
company_industry=job_details.get("company_industry"), company_industry=job_details.get("company_industry"),
description=job_details.get("description"), description=job_details.get("description"),
job_url_direct=job_details.get("job_url_direct"), job_url_direct=job_details.get("job_url_direct"),
@ -267,6 +268,7 @@ class LinkedInScraper(Scraper):
job_function = job_function_span.text.strip() job_function = job_function_span.text.strip()
return { return {
"description": description, "description": description,
"job_level": self._parse_job_level(soup),
"company_industry": self._parse_company_industry(soup), "company_industry": self._parse_company_industry(soup),
"job_type": self._parse_job_type(soup), "job_type": self._parse_job_type(soup),
"job_url_direct": self._parse_job_url_direct(soup), "job_url_direct": self._parse_job_url_direct(soup),
@ -327,6 +329,29 @@ class LinkedInScraper(Scraper):
return [get_enum_from_job_type(employment_type)] if employment_type else [] return [get_enum_from_job_type(employment_type)] if employment_type else []
@staticmethod
def _parse_job_level(soup_job_level: BeautifulSoup) -> str | None:
"""
Gets the job level from job page
:param soup_job_level:
:return: str
"""
h3_tag = soup_job_level.find(
"h3",
class_="description__job-criteria-subheader",
string=lambda text: "Seniority level" in text,
)
job_level = None
if h3_tag:
job_level_span = h3_tag.find_next_sibling(
"span",
class_="description__job-criteria-text description__job-criteria-text--criteria",
)
if job_level_span:
job_level = job_level_span.get_text(strip=True)
return job_level
@staticmethod @staticmethod
def _parse_company_industry(soup_industry: BeautifulSoup) -> str | None: def _parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
""" """