Fixed Bayt scraper integration

2025-02-21 20:10:02 +04:00 · 2025-02-21 20:10:02 +04:00 · 11a9e9a56a
parent c6ade14784
commit 11a9e9a56a
1 changed files with 19 additions and 15 deletions
--- a/src/jobspy/scrapers/bayt/init.py
+++ b/src/jobspy/scrapers/bayt/init.py
@ -77,7 +77,8 @@ class BaytScraper(Scraper):
        Grabs the job results for the given query and page number.
        """
        try:
-            url = f"{self.base_url}/en/jobs/{query}-jobs/?page={page}"
+            # Updated URL to include the "international" segment as per the original code.
            url = f"{self.base_url}/en/international/jobs/{query}-jobs/?page={page}"
            logger.info(f"Constructed URL: {url}")
            headers = {
                "User-Agent": (
@ -89,7 +90,8 @@ class BaytScraper(Scraper):
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")
-            job_listings = soup.find_all("li", class_="has-pointer-d")
+            # Use the attribute selector as in the original code.
            job_listings = soup.find_all("li", attrs={"data-js-job": ""})
            logger.info(f"Found {len(job_listings)} job listing elements")
            return job_listings
        except Exception as e:
@ -98,28 +100,30 @@ class BaytScraper(Scraper):
    def _extract_job_info(self, job: BeautifulSoup) -> Optional[JobPost]:
        """
-        Extracts the job information from a single job listing,
+        Extracts the job information from a single job listing.
        mirroring your original code's logic for company and location.
        """
-        # The h2 with class jb-title holds the title and link
+        # Find the h2 element holding the title and link (no class filtering)
-        job_general_information = job.find("h2", class_="jb-title")
+        job_general_information = job.find("h2")
        if not job_general_information:
            return None
-        job_title = job_general_information.text.strip()
+        job_title = job_general_information.get_text(strip=True)
        job_url = self._extract_job_url(job_general_information)
        if not job_url:
            return None
-        # --- Company Name (original approach) ---
+        # Extract company name using the original approach:
-        company_tag = job.find("b", class_="jb-company")
+        company_tag = job.find("div", class_="t-nowrap p10l")
-        company_name = company_tag.text.strip() if company_tag else None
+        company_name = (
            company_tag.find("span").get_text(strip=True)
            if company_tag and company_tag.find("span")
            else None
        )
-        # --- Location (original approach) ---
+        # Extract location using the original approach:
-        location_tag = job.find("span", class_="jb-loc")
+        location_tag = job.find("div", class_="t-mute t-small")
-        location = location_tag.text.strip() if location_tag else None
+        location = location_tag.get_text(strip=True) if location_tag else None
        # Build our JobPost object
        job_id = f"bayt-{abs(hash(job_url))}"
        location_obj = Location(
            city=location,
@ -147,7 +151,7 @@ class BaytScraper(Scraper):
    def _extract_job_url(self, job_general_information: BeautifulSoup) -> Optional[str]:
        """
-        Pulls the job URL from the 'a' within h2.jb-title.
+        Pulls the job URL from the 'a' within the h2 element.
        """
        a_tag = job_general_information.find("a")
        if a_tag and a_tag.has_attr("href"):