mirror of https://github.com/Bunsly/JobSpy
format: jobspy/scrapers/ziprecruiter
parent
2e421ff4e2
commit
289f626e79
|
@ -4,6 +4,7 @@ jobspy.scrapers.ziprecruiter
|
||||||
|
|
||||||
This module contains routines to scrape ZipRecruiter.
|
This module contains routines to scrape ZipRecruiter.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
@ -18,7 +19,7 @@ from ..utils import (
|
||||||
logger,
|
logger,
|
||||||
extract_emails_from_text,
|
extract_emails_from_text,
|
||||||
create_session,
|
create_session,
|
||||||
markdown_converter
|
markdown_converter,
|
||||||
)
|
)
|
||||||
from ...jobs import (
|
from ...jobs import (
|
||||||
JobPost,
|
JobPost,
|
||||||
|
@ -27,7 +28,7 @@ from ...jobs import (
|
||||||
JobResponse,
|
JobResponse,
|
||||||
JobType,
|
JobType,
|
||||||
Country,
|
Country,
|
||||||
DescriptionFormat
|
DescriptionFormat,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,7 +65,7 @@ class ZipRecruiterScraper(Scraper):
|
||||||
break
|
break
|
||||||
if page > 1:
|
if page > 1:
|
||||||
time.sleep(self.delay)
|
time.sleep(self.delay)
|
||||||
logger.info(f'ZipRecruiter search page: {page}')
|
logger.info(f"ZipRecruiter search page: {page}")
|
||||||
jobs_on_page, continue_token = self._find_jobs_in_page(
|
jobs_on_page, continue_token = self._find_jobs_in_page(
|
||||||
scraper_input, continue_token
|
scraper_input, continue_token
|
||||||
)
|
)
|
||||||
|
@ -90,25 +91,24 @@ class ZipRecruiterScraper(Scraper):
|
||||||
if continue_token:
|
if continue_token:
|
||||||
params["continue_from"] = continue_token
|
params["continue_from"] = continue_token
|
||||||
try:
|
try:
|
||||||
res= self.session.get(
|
res = self.session.get(
|
||||||
f"{self.api_url}/jobs-app/jobs",
|
f"{self.api_url}/jobs-app/jobs", headers=self.headers, params=params
|
||||||
headers=self.headers,
|
|
||||||
params=params
|
|
||||||
)
|
)
|
||||||
if res.status_code not in range(200, 400):
|
if res.status_code not in range(200, 400):
|
||||||
if res.status_code == 429:
|
if res.status_code == 429:
|
||||||
logger.error(f'429 Response - Blocked by ZipRecruiter for too many requests')
|
err = "429 Response - Blocked by ZipRecruiter for too many requests"
|
||||||
else:
|
else:
|
||||||
logger.error(f'ZipRecruiter response status code {res.status_code}')
|
err = f"ZipRecruiter response status code {res.status_code}"
|
||||||
|
err += f" with response: {res.text}" # ZipRecruiter likely not available in EU
|
||||||
|
logger.error(err)
|
||||||
return jobs_list, ""
|
return jobs_list, ""
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if "Proxy responded with" in str(e):
|
if "Proxy responded with" in str(e):
|
||||||
logger.error(f'Indeed: Bad proxy')
|
logger.error(f"Indeed: Bad proxy")
|
||||||
else:
|
else:
|
||||||
logger.error(f'Indeed: {str(e)}')
|
logger.error(f"Indeed: {str(e)}")
|
||||||
return jobs_list, ""
|
return jobs_list, ""
|
||||||
|
|
||||||
|
|
||||||
res_data = res.json()
|
res_data = res.json()
|
||||||
jobs_list = res_data.get("jobs", [])
|
jobs_list = res_data.get("jobs", [])
|
||||||
next_continue_token = res_data.get("continue", None)
|
next_continue_token = res_data.get("continue", None)
|
||||||
|
@ -129,7 +129,11 @@ class ZipRecruiterScraper(Scraper):
|
||||||
self.seen_urls.add(job_url)
|
self.seen_urls.add(job_url)
|
||||||
|
|
||||||
description = job.get("job_description", "").strip()
|
description = job.get("job_description", "").strip()
|
||||||
description = markdown_converter(description) if self.scraper_input.description_format == DescriptionFormat.MARKDOWN else description
|
description = (
|
||||||
|
markdown_converter(description)
|
||||||
|
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN
|
||||||
|
else description
|
||||||
|
)
|
||||||
company = job.get("hiring_company", {}).get("name")
|
company = job.get("hiring_company", {}).get("name")
|
||||||
country_value = "usa" if job.get("job_country") == "US" else "canada"
|
country_value = "usa" if job.get("job_country") == "US" else "canada"
|
||||||
country_enum = Country.from_string(country_value)
|
country_enum = Country.from_string(country_value)
|
||||||
|
@ -140,23 +144,22 @@ class ZipRecruiterScraper(Scraper):
|
||||||
job_type = self._get_job_type_enum(
|
job_type = self._get_job_type_enum(
|
||||||
job.get("employment_type", "").replace("_", "").lower()
|
job.get("employment_type", "").replace("_", "").lower()
|
||||||
)
|
)
|
||||||
date_posted = datetime.fromisoformat(job['posted_time'].rstrip("Z")).date()
|
date_posted = datetime.fromisoformat(job["posted_time"].rstrip("Z")).date()
|
||||||
|
comp_interval = job.get("compensation_interval")
|
||||||
|
comp_interval = "yearly" if comp_interval == "annual" else comp_interval
|
||||||
|
comp_min = int(job["compensation_min"]) if "compensation_min" in job else None
|
||||||
|
comp_max = int(job["compensation_max"]) if "compensation_max" in job else None
|
||||||
|
comp_currency = job.get("compensation_currency")
|
||||||
return JobPost(
|
return JobPost(
|
||||||
title=title,
|
title=title,
|
||||||
company_name=company,
|
company_name=company,
|
||||||
location=location,
|
location=location,
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
compensation=Compensation(
|
compensation=Compensation(
|
||||||
interval="yearly"
|
interval=comp_interval,
|
||||||
if job.get("compensation_interval") == "annual"
|
min_amount=comp_min,
|
||||||
else job.get("compensation_interval"),
|
max_amount=comp_max,
|
||||||
min_amount=int(job["compensation_min"])
|
currency=comp_currency,
|
||||||
if "compensation_min" in job
|
|
||||||
else None,
|
|
||||||
max_amount=int(job["compensation_max"])
|
|
||||||
if "compensation_max" in job
|
|
||||||
else None,
|
|
||||||
currency=job.get("compensation_currency"),
|
|
||||||
),
|
),
|
||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=job_url,
|
job_url=job_url,
|
||||||
|
@ -165,8 +168,9 @@ class ZipRecruiterScraper(Scraper):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_cookies(self):
|
def _get_cookies(self):
|
||||||
data="event_type=session&logged_in=false&number_of_retry=1&property=model%3AiPhone&property=os%3AiOS&property=locale%3Aen_us&property=app_build_number%3A4734&property=app_version%3A91.0&property=manufacturer%3AApple&property=timestamp%3A2024-01-12T12%3A04%3A42-06%3A00&property=screen_height%3A852&property=os_version%3A16.6.1&property=source%3Ainstall&property=screen_width%3A393&property=device_model%3AiPhone%2014%20Pro&property=brand%3AApple"
|
data = "event_type=session&logged_in=false&number_of_retry=1&property=model%3AiPhone&property=os%3AiOS&property=locale%3Aen_us&property=app_build_number%3A4734&property=app_version%3A91.0&property=manufacturer%3AApple&property=timestamp%3A2024-01-12T12%3A04%3A42-06%3A00&property=screen_height%3A852&property=os_version%3A16.6.1&property=source%3Ainstall&property=screen_width%3A393&property=device_model%3AiPhone%2014%20Pro&property=brand%3AApple"
|
||||||
self.session.post(f"{self.api_url}/jobs-app/event", data=data, headers=self.headers)
|
url = f"{self.api_url}/jobs-app/event"
|
||||||
|
self.session.post(url, data=data, headers=self.headers)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_job_type_enum(job_type_str: str) -> list[JobType] | None:
|
def _get_job_type_enum(job_type_str: str) -> list[JobType] | None:
|
||||||
|
@ -182,16 +186,13 @@ class ZipRecruiterScraper(Scraper):
|
||||||
"location": scraper_input.location,
|
"location": scraper_input.location,
|
||||||
}
|
}
|
||||||
if scraper_input.hours_old:
|
if scraper_input.hours_old:
|
||||||
fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None
|
params["days"] = max(scraper_input.hours_old // 24, 1)
|
||||||
params['days'] = fromage
|
job_type_map = {JobType.FULL_TIME: "full_time", JobType.PART_TIME: "part_time"}
|
||||||
job_type_map = {
|
|
||||||
JobType.FULL_TIME: 'full_time',
|
|
||||||
JobType.PART_TIME: 'part_time'
|
|
||||||
}
|
|
||||||
if scraper_input.job_type:
|
if scraper_input.job_type:
|
||||||
params['employment_type'] = job_type_map[scraper_input.job_type] if scraper_input.job_type in job_type_map else scraper_input.job_type.value[0]
|
job_type = scraper_input.job_type
|
||||||
|
params["employment_type"] = job_type_map.get(job_type, job_type.value[0])
|
||||||
if scraper_input.easy_apply:
|
if scraper_input.easy_apply:
|
||||||
params['zipapply'] = 1
|
params["zipapply"] = 1
|
||||||
if scraper_input.is_remote:
|
if scraper_input.is_remote:
|
||||||
params["remote"] = 1
|
params["remote"] = 1
|
||||||
if scraper_input.distance:
|
if scraper_input.distance:
|
||||||
|
|
Loading…
Reference in New Issue