From 11a9e9a56ab953a13556c110b74838a57b0c3733 Mon Sep 17 00:00:00 2001 From: Abdulrahman Al Muaitah Date: Fri, 21 Feb 2025 20:10:02 +0400 Subject: [PATCH] Fixed Bayt scraper integration --- src/jobspy/scrapers/bayt/__init__.py | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/jobspy/scrapers/bayt/__init__.py b/src/jobspy/scrapers/bayt/__init__.py index 6d3b6b5..d5c9ddd 100644 --- a/src/jobspy/scrapers/bayt/__init__.py +++ b/src/jobspy/scrapers/bayt/__init__.py @@ -77,7 +77,8 @@ class BaytScraper(Scraper): Grabs the job results for the given query and page number. """ try: - url = f"{self.base_url}/en/jobs/{query}-jobs/?page={page}" + # Updated URL to include the "international" segment as per the original code. + url = f"{self.base_url}/en/international/jobs/{query}-jobs/?page={page}" logger.info(f"Constructed URL: {url}") headers = { "User-Agent": ( @@ -89,7 +90,8 @@ class BaytScraper(Scraper): response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") - job_listings = soup.find_all("li", class_="has-pointer-d") + # Use the attribute selector as in the original code. + job_listings = soup.find_all("li", attrs={"data-js-job": ""}) logger.info(f"Found {len(job_listings)} job listing elements") return job_listings except Exception as e: @@ -98,28 +100,30 @@ class BaytScraper(Scraper): def _extract_job_info(self, job: BeautifulSoup) -> Optional[JobPost]: """ - Extracts the job information from a single job listing, - mirroring your original code's logic for company and location. + Extracts the job information from a single job listing. """ - # The h2 with class jb-title holds the title and link - job_general_information = job.find("h2", class_="jb-title") + # Find the h2 element holding the title and link (no class filtering) + job_general_information = job.find("h2") if not job_general_information: return None - job_title = job_general_information.text.strip() + job_title = job_general_information.get_text(strip=True) job_url = self._extract_job_url(job_general_information) if not job_url: return None - # --- Company Name (original approach) --- - company_tag = job.find("b", class_="jb-company") - company_name = company_tag.text.strip() if company_tag else None + # Extract company name using the original approach: + company_tag = job.find("div", class_="t-nowrap p10l") + company_name = ( + company_tag.find("span").get_text(strip=True) + if company_tag and company_tag.find("span") + else None + ) - # --- Location (original approach) --- - location_tag = job.find("span", class_="jb-loc") - location = location_tag.text.strip() if location_tag else None + # Extract location using the original approach: + location_tag = job.find("div", class_="t-mute t-small") + location = location_tag.get_text(strip=True) if location_tag else None - # Build our JobPost object job_id = f"bayt-{abs(hash(job_url))}" location_obj = Location( city=location, @@ -147,7 +151,7 @@ class BaytScraper(Scraper): def _extract_job_url(self, job_general_information: BeautifulSoup) -> Optional[str]: """ - Pulls the job URL from the 'a' within h2.jb-title. + Pulls the job URL from the 'a' within the h2 element. """ a_tag = job_general_information.find("a") if a_tag and a_tag.has_attr("href"):