mirror of https://github.com/Bunsly/JobSpy
Fixed Bayt scraper integration
parent
c6ade14784
commit
11a9e9a56a
|
@ -77,7 +77,8 @@ class BaytScraper(Scraper):
|
||||||
Grabs the job results for the given query and page number.
|
Grabs the job results for the given query and page number.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
url = f"{self.base_url}/en/jobs/{query}-jobs/?page={page}"
|
# Updated URL to include the "international" segment as per the original code.
|
||||||
|
url = f"{self.base_url}/en/international/jobs/{query}-jobs/?page={page}"
|
||||||
logger.info(f"Constructed URL: {url}")
|
logger.info(f"Constructed URL: {url}")
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": (
|
"User-Agent": (
|
||||||
|
@ -89,7 +90,8 @@ class BaytScraper(Scraper):
|
||||||
response = requests.get(url, headers=headers, timeout=10)
|
response = requests.get(url, headers=headers, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
job_listings = soup.find_all("li", class_="has-pointer-d")
|
# Use the attribute selector as in the original code.
|
||||||
|
job_listings = soup.find_all("li", attrs={"data-js-job": ""})
|
||||||
logger.info(f"Found {len(job_listings)} job listing elements")
|
logger.info(f"Found {len(job_listings)} job listing elements")
|
||||||
return job_listings
|
return job_listings
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -98,28 +100,30 @@ class BaytScraper(Scraper):
|
||||||
|
|
||||||
def _extract_job_info(self, job: BeautifulSoup) -> Optional[JobPost]:
|
def _extract_job_info(self, job: BeautifulSoup) -> Optional[JobPost]:
|
||||||
"""
|
"""
|
||||||
Extracts the job information from a single job listing,
|
Extracts the job information from a single job listing.
|
||||||
mirroring your original code's logic for company and location.
|
|
||||||
"""
|
"""
|
||||||
# The h2 with class jb-title holds the title and link
|
# Find the h2 element holding the title and link (no class filtering)
|
||||||
job_general_information = job.find("h2", class_="jb-title")
|
job_general_information = job.find("h2")
|
||||||
if not job_general_information:
|
if not job_general_information:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
job_title = job_general_information.text.strip()
|
job_title = job_general_information.get_text(strip=True)
|
||||||
job_url = self._extract_job_url(job_general_information)
|
job_url = self._extract_job_url(job_general_information)
|
||||||
if not job_url:
|
if not job_url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# --- Company Name (original approach) ---
|
# Extract company name using the original approach:
|
||||||
company_tag = job.find("b", class_="jb-company")
|
company_tag = job.find("div", class_="t-nowrap p10l")
|
||||||
company_name = company_tag.text.strip() if company_tag else None
|
company_name = (
|
||||||
|
company_tag.find("span").get_text(strip=True)
|
||||||
|
if company_tag and company_tag.find("span")
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
# --- Location (original approach) ---
|
# Extract location using the original approach:
|
||||||
location_tag = job.find("span", class_="jb-loc")
|
location_tag = job.find("div", class_="t-mute t-small")
|
||||||
location = location_tag.text.strip() if location_tag else None
|
location = location_tag.get_text(strip=True) if location_tag else None
|
||||||
|
|
||||||
# Build our JobPost object
|
|
||||||
job_id = f"bayt-{abs(hash(job_url))}"
|
job_id = f"bayt-{abs(hash(job_url))}"
|
||||||
location_obj = Location(
|
location_obj = Location(
|
||||||
city=location,
|
city=location,
|
||||||
|
@ -147,7 +151,7 @@ class BaytScraper(Scraper):
|
||||||
|
|
||||||
def _extract_job_url(self, job_general_information: BeautifulSoup) -> Optional[str]:
|
def _extract_job_url(self, job_general_information: BeautifulSoup) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Pulls the job URL from the 'a' within h2.jb-title.
|
Pulls the job URL from the 'a' within the h2 element.
|
||||||
"""
|
"""
|
||||||
a_tag = job_general_information.find("a")
|
a_tag = job_general_information.find("a")
|
||||||
if a_tag and a_tag.has_attr("href"):
|
if a_tag and a_tag.has_attr("href"):
|
||||||
|
|
Loading…
Reference in New Issue