fix(naukri): prevent str.find error by normalizing input and parsing before Markdown (#300)

This commit is contained in:
Kaushik H S
2025-08-24 02:08:26 +05:30
committed by GitHub
parent 51888004b7
commit 6e8576f8a8
2 changed files with 12 additions and 5 deletions

View File

@@ -5,10 +5,12 @@ from jobspy.model import JobType, Location
from jobspy.util import get_enum_from_job_type
def parse_job_type(soup: BeautifulSoup) -> list[JobType] | None:
def parse_job_type(soup: BeautifulSoup |str) -> list[JobType] | None:
"""
Gets the job type from the job page
"""
if isinstance(soup, str):
soup = BeautifulSoup(soup, "html.parser")
job_type_tag = soup.find("span", class_="job-type")
if job_type_tag:
job_type_str = job_type_tag.get_text(strip=True).lower().replace("-", "")
@@ -16,10 +18,12 @@ def parse_job_type(soup: BeautifulSoup) -> list[JobType] | None:
return None
def parse_company_industry(soup: BeautifulSoup) -> str | None:
def parse_company_industry(soup: BeautifulSoup | str) -> str | None:
"""
Gets the company industry from the job page
"""
if isinstance(soup, str):
soup = BeautifulSoup(soup, "html.parser")
industry_tag = soup.find("span", class_="industry")
return industry_tag.get_text(strip=True) if industry_tag else None