From 6e8576f8a8f5e28762399ac1236d81f260fa1331 Mon Sep 17 00:00:00 2001
From: Kaushik H S <kaushik0h0s@gmail.com>
Date: Sun, 24 Aug 2025 02:08:26 +0530
Subject: [PATCH] fix(naukri): prevent str.find error by normalizing input and
 parsing before Markdown (#300)

---
 jobspy/naukri/__init__.py | 9 ++++++---
 jobspy/naukri/util.py     | 8 ++++++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/jobspy/naukri/__init__.py b/jobspy/naukri/__init__.py
index d456c85..43d4828 100644
--- a/jobspy/naukri/__init__.py
+++ b/jobspy/naukri/__init__.py
@@ -164,12 +164,15 @@ class Naukri(Scraper):
         date_posted = self._parse_date(job.get("footerPlaceholderLabel"), job.get("createdDate"))
 
         job_url = f"https://www.naukri.com{job.get('jdURL', f'/job/{job_id}')}"
-        description = job.get("jobDescription") if full_descr else None
+        raw_description = job.get("jobDescription") if full_descr else None
+
+        job_type = parse_job_type(raw_description) if raw_description else None
+        company_industry = parse_company_industry(raw_description) if raw_description else None
+
+        description = raw_description
         if description and self.scraper_input.description_format == DescriptionFormat.MARKDOWN:
             description = markdown_converter(description)
 
-        job_type = parse_job_type(description) if description else None
-        company_industry = parse_company_industry(description) if description else None
         is_remote = is_job_remote(title, description or "", location)
         company_logo = job.get("logoPathV3") or job.get("logoPath")
 
diff --git a/jobspy/naukri/util.py b/jobspy/naukri/util.py
index f363c9a..c1b0d7a 100644
--- a/jobspy/naukri/util.py
+++ b/jobspy/naukri/util.py
@@ -5,10 +5,12 @@ from jobspy.model import JobType, Location
 from jobspy.util import get_enum_from_job_type
 
 
-def parse_job_type(soup: BeautifulSoup) -> list[JobType] | None:
+def parse_job_type(soup: BeautifulSoup |str) -> list[JobType] | None:
     """
     Gets the job type from the job page
     """
+    if isinstance(soup, str):
+        soup = BeautifulSoup(soup, "html.parser")
     job_type_tag = soup.find("span", class_="job-type")
     if job_type_tag:
         job_type_str = job_type_tag.get_text(strip=True).lower().replace("-", "")
@@ -16,10 +18,12 @@ def parse_job_type(soup: BeautifulSoup) -> list[JobType] | None:
     return None
 
 
-def parse_company_industry(soup: BeautifulSoup) -> str | None:
+def parse_company_industry(soup: BeautifulSoup | str) -> str | None:
     """
     Gets the company industry from the job page
     """
+    if isinstance(soup, str):
+        soup = BeautifulSoup(soup, "html.parser")
     industry_tag = soup.find("span", class_="industry")
     return industry_tag.get_text(strip=True) if industry_tag else None