diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index e2956f5..b74ab72 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -208,6 +208,7 @@ def scrape_jobs( "max_amount", "currency", "is_remote", + "job_level", "job_function", "emails", "description", diff --git a/src/jobspy/jobs/__init__.py b/src/jobspy/jobs/__init__.py index 25a056f..7563f2b 100644 --- a/src/jobspy/jobs/__init__.py +++ b/src/jobspy/jobs/__init__.py @@ -243,6 +243,9 @@ class JobPost(BaseModel): emails: list[str] | None = None is_remote: bool | None = None + # linkedin specific + job_level: str | None = None + # linkedin and indeed specific company_industry: str | None = None diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py index 8e57e10..45bf31a 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/jobspy/scrapers/linkedin/__init__.py @@ -219,6 +219,7 @@ class LinkedInScraper(Scraper): job_url=f"{self.base_url}/jobs/view/{job_id}", compensation=compensation, job_type=job_details.get("job_type"), + job_level=job_details.get("job_level"), company_industry=job_details.get("company_industry"), description=job_details.get("description"), job_url_direct=job_details.get("job_url_direct"), @@ -267,6 +268,7 @@ class LinkedInScraper(Scraper): job_function = job_function_span.text.strip() return { "description": description, + "job_level": self._parse_job_level(soup), "company_industry": self._parse_company_industry(soup), "job_type": self._parse_job_type(soup), "job_url_direct": self._parse_job_url_direct(soup), @@ -327,6 +329,29 @@ class LinkedInScraper(Scraper): return [get_enum_from_job_type(employment_type)] if employment_type else [] + @staticmethod + def _parse_job_level(soup_job_level: BeautifulSoup) -> str | None: + """ + Gets the job level from job page + :param soup_job_level: + :return: str + """ + h3_tag = soup_job_level.find( + "h3", + class_="description__job-criteria-subheader", + string=lambda text: "Seniority level" in text, + ) + job_level = None + if h3_tag: + job_level_span = h3_tag.find_next_sibling( + "span", + class_="description__job-criteria-text description__job-criteria-text--criteria", + ) + if job_level_span: + job_level = job_level_span.get_text(strip=True) + + return job_level + @staticmethod def _parse_company_industry(soup_industry: BeautifulSoup) -> str | None: """