diff --git a/README.md b/README.md index 0a82353..9049ef5 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ Optional ├── job_type (enum): fulltime, parttime, internship, contract ├── proxy (str): in format 'http://user:pass@host:port' or [https, socks] ├── is_remote (bool) +├── full_description (bool): fetches full description for Indeed / LinkedIn (much slower) ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type' ├── easy_apply (bool): filters for jobs that are hosted on LinkedIn ├── country_indeed (enum): filters the country on Indeed (see below for correct spelling) diff --git a/examples/JobSpy_Demo.py b/examples/JobSpy_AllSites.py similarity index 72% rename from examples/JobSpy_Demo.py rename to examples/JobSpy_AllSites.py index c982793..22deea0 100644 --- a/examples/JobSpy_Demo.py +++ b/examples/JobSpy_AllSites.py @@ -2,12 +2,11 @@ from jobspy import scrape_jobs import pandas as pd jobs: pd.DataFrame = scrape_jobs( - site_name=["indeed", "linkedin", "zip_recruiter"], + site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"], search_term="software engineer", location="Dallas, TX", - results_wanted=50, # be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho) + results_wanted=25, # be wary the higher it is, the more likey you'll get blocked (rotating proxy can help tho) country_indeed="USA", - offset=25 # start jobs from an offset (use if search failed and want to continue) # proxy="http://jobspy:5a4vpWtj8EeJ2hoYzk@ca.smartproxy.com:20001", ) @@ -28,4 +27,4 @@ print("outputted to jobs.csv") # jobs.to_xlsx('jobs.xlsx', index=False) # 4: display in Jupyter Notebook (1. pip install jupyter 2. jupyter notebook) -# display(jobs) +# display(jobs) \ No newline at end of file diff --git a/examples/JobSpy_LongScrape.py b/examples/JobSpy_LongScrape.py new file mode 100644 index 0000000..189ca81 --- /dev/null +++ b/examples/JobSpy_LongScrape.py @@ -0,0 +1,77 @@ +from jobspy import scrape_jobs +import pandas as pd +import os +import time + +# creates csv a new filename if the jobs.csv already exists. +csv_filename = "jobs.csv" +counter = 1 +while os.path.exists(csv_filename): + csv_filename = f"jobs_{counter}.csv" + counter += 1 + +# results wanted and offset +results_wanted = 1000 +offset = 0 + +all_jobs = [] + +# max retries +max_retries = 3 + +# nuumber of results at each iteration +results_in_each_iteration = 30 + +while len(all_jobs) < results_wanted: + retry_count = 0 + while retry_count < max_retries: + print("Doing from", offset, "to", offset + results_in_each_iteration, "jobs") + try: + jobs = scrape_jobs( + site_name=["indeed"], + search_term="software engineer", + # New York, NY + # Dallas, TX + + # Los Angeles, CA + location="Los Angeles, CA", + results_wanted=min(results_in_each_iteration, results_wanted - len(all_jobs)), + country_indeed="USA", + offset=offset, + # proxy="http://jobspy:5a4vpWtj8EeJ2hoYzk@ca.smartproxy.com:20001", + ) + + # Add the scraped jobs to the list + all_jobs.extend(jobs.to_dict('records')) + + # Increment the offset for the next page of results + offset += results_in_each_iteration + + # Add a delay to avoid rate limiting (you can adjust the delay time as needed) + print(f"Scraped {len(all_jobs)} jobs") + print("Sleeping secs", 100 * (retry_count + 1)) + time.sleep(100 * (retry_count + 1)) # Sleep for 2 seconds between requests + + break # Break out of the retry loop if successful + except Exception as e: + print(f"Error: {e}") + retry_count += 1 + print("Sleeping secs before retry", 100 * (retry_count + 1)) + time.sleep(100 * (retry_count + 1)) + if retry_count >= max_retries: + print("Max retries reached. Exiting.") + break + +# DataFrame from the collected job data +jobs_df = pd.DataFrame(all_jobs) + +# Formatting +pd.set_option("display.max_columns", None) +pd.set_option("display.max_rows", None) +pd.set_option("display.width", None) +pd.set_option("display.max_colwidth", 50) + +print(jobs_df) + +jobs_df.to_csv(csv_filename, index=False) +print(f"Outputted to {csv_filename}") diff --git a/pyproject.toml b/pyproject.toml index 9a49bf7..4d8e71e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.1.31" +version = "1.1.36" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/JobSpy" diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index 8214a1f..1a4f66d 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -40,6 +40,7 @@ def scrape_jobs( country_indeed: str = "usa", hyperlinks: bool = False, proxy: Optional[str] = None, + full_description: Optional[bool] = False, offset: Optional[int] = 0, ) -> pd.DataFrame: """ @@ -74,6 +75,7 @@ def scrape_jobs( is_remote=is_remote, job_type=job_type, easy_apply=easy_apply, + full_description=full_description, results_wanted=results_wanted, offset=offset, ) diff --git a/src/jobspy/jobs/__init__.py b/src/jobspy/jobs/__init__.py index f018b1e..db600f1 100644 --- a/src/jobspy/jobs/__init__.py +++ b/src/jobspy/jobs/__init__.py @@ -1,7 +1,7 @@ -from typing import Union, Optional +from typing import Optional from datetime import date from enum import Enum -from pydantic import BaseModel, validator +from pydantic import BaseModel class JobType(Enum): diff --git a/src/jobspy/scrapers/__init__.py b/src/jobspy/scrapers/__init__.py index 97aaad0..37bd356 100644 --- a/src/jobspy/scrapers/__init__.py +++ b/src/jobspy/scrapers/__init__.py @@ -19,6 +19,7 @@ class ScraperInput(BaseModel): is_remote: bool = False job_type: Optional[JobType] = None easy_apply: bool = None # linkedin + full_description: bool = False offset: int = 0 results_wanted: int = 15 diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/jobspy/scrapers/glassdoor/__init__.py index 706b3e7..69dd3e4 100644 --- a/src/jobspy/scrapers/glassdoor/__init__.py +++ b/src/jobspy/scrapers/glassdoor/__init__.py @@ -5,12 +5,16 @@ jobspy.scrapers.glassdoor This module contains routines to scrape Glassdoor. """ import json -from typing import Optional, Any +import requests +from bs4 import BeautifulSoup +from typing import Optional from datetime import datetime, timedelta +from concurrent.futures import ThreadPoolExecutor, as_completed +from ..utils import count_urgent_words, extract_emails_from_text from .. import Scraper, ScraperInput, Site from ..exceptions import GlassdoorException -from ..utils import create_session +from ..utils import create_session, modify_and_get_description from ...jobs import ( JobPost, Compensation, @@ -66,50 +70,70 @@ class GlassdoorScraper(Scraper): jobs_data = res_json["data"]["jobListings"]["jobListings"] jobs = [] - for i, job in enumerate(jobs_data): - job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][ - "linkItems" - ][i]["url"] - if job_url in self.seen_urls: - continue - self.seen_urls.add(job_url) - job = job["jobview"] - title = job["job"]["jobTitleText"] - company_name = job["header"]["employerNameFromSearch"] - location_name = job["header"].get("locationName", "") - location_type = job["header"].get("locationType", "") - age_in_days = job["header"].get("ageInDays") - is_remote, location = False, None - date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days else None - - if location_type == "S": - is_remote = True - else: - location = self.parse_location(location_name) - - compensation = self.parse_compensation(job["header"]) - - job = JobPost( - title=title, - company_name=company_name, - date_posted=date_posted, - job_url=job_url, - location=location, - compensation=compensation, - is_remote=is_remote - ) - jobs.append(job) + with ThreadPoolExecutor(max_workers=self.jobs_per_page) as executor: + future_to_job_data = {executor.submit(self.process_job, job): job for job in jobs_data} + for future in as_completed(future_to_job_data): + job_data = future_to_job_data[future] + try: + job_post = future.result() + if job_post: + jobs.append(job_post) + except Exception as exc: + raise GlassdoorException(f'Glassdoor generated an exception: {exc}') return jobs, self.get_cursor_for_page( res_json["data"]["jobListings"]["paginationCursors"], page_num + 1 ) + def process_job(self, job_data): + """Processes a single job and fetches its description.""" + job_id = job_data["jobview"]["job"]["listingId"] + job_url = f'{self.url}/job-listing/?jl={job_id}' + if job_url in self.seen_urls: + return None + self.seen_urls.add(job_url) + job = job_data["jobview"] + title = job["job"]["jobTitleText"] + company_name = job["header"]["employerNameFromSearch"] + location_name = job["header"].get("locationName", "") + location_type = job["header"].get("locationType", "") + age_in_days = job["header"].get("ageInDays") + is_remote, location = False, None + date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days else None + + if location_type == "S": + is_remote = True + else: + location = self.parse_location(location_name) + + compensation = self.parse_compensation(job["header"]) + + try: + description = self.fetch_job_description(job_id) + except Exception as e : + description = None + + job_post = JobPost( + title=title, + company_name=company_name, + date_posted=date_posted, + job_url=job_url, + location=location, + compensation=compensation, + is_remote=is_remote, + description=description, + emails=extract_emails_from_text(description) if description else None, + num_urgent_words=count_urgent_words(description) if description else None, + ) + return job_post + def scrape(self, scraper_input: ScraperInput) -> JobResponse: """ Scrapes Glassdoor for jobs with scraper_input criteria. :param scraper_input: Information about job search criteria. :return: JobResponse containing a list of jobs. """ + scraper_input.results_wanted = min(900, scraper_input.results_wanted) self.country = scraper_input.country self.url = self.country.get_url() @@ -143,6 +167,41 @@ class GlassdoorScraper(Scraper): return JobResponse(jobs=all_jobs) + def fetch_job_description(self, job_id): + """Fetches the job description for a single job ID.""" + url = f"{self.url}/graph" + body = [ + { + "operationName": "JobDetailQuery", + "variables": { + "jl": job_id, + "queryString": "q", + "pageTypeEnum": "SERP" + }, + "query": """ + query JobDetailQuery($jl: Long!, $queryString: String, $pageTypeEnum: PageTypeEnum) { + jobview: jobView( + listingId: $jl + contextHolder: {queryString: $queryString, pageTypeEnum: $pageTypeEnum} + ) { + job { + description + __typename + } + __typename + } + } + """ + } + ] + response = requests.post(url, json=body, headers=GlassdoorScraper.headers()) + if response.status_code != 200: + return None + data = response.json()[0] + desc = data['data']['jobview']['job']['description'] + soup = BeautifulSoup(desc, 'html.parser') + return modify_and_get_description(soup) + @staticmethod def parse_compensation(data: dict) -> Optional[Compensation]: pay_period = data.get("payPeriod") @@ -231,12 +290,11 @@ class GlassdoorScraper(Scraper): for job_type in JobType: if job_type_str in job_type.value: return [job_type] - return None @staticmethod - def parse_location(location_name: str) -> Location: + def parse_location(location_name: str) -> Location | None: if not location_name or location_name == "Remote": - return None + return city, _, state = location_name.partition(", ") return Location(city=city, state=state) @@ -245,7 +303,6 @@ class GlassdoorScraper(Scraper): for cursor_data in pagination_cursors: if cursor_data["pageNumber"] == page_num: return cursor_data["cursor"] - return None @staticmethod def headers() -> dict: diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index a2be108..b44f225 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -22,6 +22,7 @@ from ..utils import ( extract_emails_from_text, create_session, get_enum_from_job_type, + modify_and_get_description ) from ...jobs import ( JobPost, @@ -79,7 +80,7 @@ class IndeedScraper(Scraper): if sc_values: params["sc"] = "0kf:" + "".join(sc_values) + ";" try: - session = create_session(self.proxy, is_tls=True) + session = create_session(self.proxy) response = session.get( f"{self.url}/jobs", headers=self.get_headers(), @@ -141,7 +142,8 @@ class IndeedScraper(Scraper): date_posted = datetime.fromtimestamp(timestamp_seconds) date_posted = date_posted.strftime("%Y-%m-%d") - description = self.get_description(job_url) + description = self.get_description(job_url) if scraper_input.full_description else None + with io.StringIO(job["snippet"]) as f: soup_io = BeautifulSoup(f, "html.parser") li_elements = soup_io.find_all("li") @@ -248,9 +250,7 @@ class IndeedScraper(Scraper): return None soup = BeautifulSoup(job_description, "html.parser") - text_content = " ".join(soup.get_text(separator=" ").split()).strip() - - return text_content + return modify_and_get_description(soup) @staticmethod def get_job_type(job: dict) -> list[JobType] | None: diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py index 6383837..d259a46 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/jobspy/scrapers/linkedin/__init__.py @@ -4,26 +4,40 @@ jobspy.scrapers.linkedin This module contains routines to scrape LinkedIn. """ +import time +import random from typing import Optional from datetime import datetime import requests -import time from requests.exceptions import ProxyError -from bs4 import BeautifulSoup -from bs4.element import Tag from threading import Lock +from bs4.element import Tag +from bs4 import BeautifulSoup from urllib.parse import urlparse, urlunparse from .. import Scraper, ScraperInput, Site -from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type, currency_parser from ..exceptions import LinkedInException -from ...jobs import JobPost, Location, JobResponse, JobType, Country, Compensation +from ..utils import create_session +from ...jobs import ( + JobPost, + Location, + JobResponse, + JobType, + Country, + Compensation +) +from ..utils import ( + count_urgent_words, + extract_emails_from_text, + get_enum_from_job_type, + currency_parser, + modify_and_get_description +) class LinkedInScraper(Scraper): - MAX_RETRIES = 3 - DELAY = 10 + DELAY = 3 def __init__(self, proxy: Optional[str] = None): """ @@ -57,6 +71,7 @@ class LinkedInScraper(Scraper): return mapping.get(job_type_enum, "") while len(job_list) < scraper_input.results_wanted and page < 1000: + session = create_session(is_tls=False, has_retry=True, delay=5) params = { "keywords": scraper_input.search_term, "location": scraper_input.location, @@ -71,44 +86,30 @@ class LinkedInScraper(Scraper): } params = {k: v for k, v in params.items() if v is not None} - retries = 0 - while retries < self.MAX_RETRIES: - try: - response = requests.get( - f"{self.url}/jobs-guest/jobs/api/seeMoreJobPostings/search?", - params=params, - allow_redirects=True, - proxies=self.proxy, - timeout=10, - ) - response.raise_for_status() - - break - except requests.HTTPError as e: - if hasattr(e, "response") and e.response is not None: - if e.response.status_code in (429, 502): - time.sleep(self.DELAY) - retries += 1 - continue - else: - raise LinkedInException( - f"bad response status code: {e.response.status_code}" - ) - else: - raise - except ProxyError as e: - raise LinkedInException("bad proxy") - except Exception as e: - raise LinkedInException(str(e)) - else: - # Raise an exception if the maximum number of retries is reached - raise LinkedInException( - "Max retries reached, failed to get a valid response" + try: + response = session.get( + f"{self.url}/jobs-guest/jobs/api/seeMoreJobPostings/search?", + params=params, + allow_redirects=True, + proxies=self.proxy, + headers=self.headers(), + timeout=10, ) + response.raise_for_status() + + except requests.HTTPError as e: + raise LinkedInException(f"bad response status code: {e.response.status_code}") + except ProxyError as e: + raise LinkedInException("bad proxy") + except Exception as e: + raise LinkedInException(str(e)) soup = BeautifulSoup(response.text, "html.parser") + job_cards = soup.find_all("div", class_="base-search-card") + if len(job_cards) == 0: + return JobResponse(jobs=job_list) - for job_card in soup.find_all("div", class_="base-search-card"): + for job_card in job_cards: job_url = None href_tag = job_card.find("a", class_="base-card__full-link") if href_tag and "href" in href_tag.attrs: @@ -123,18 +124,19 @@ class LinkedInScraper(Scraper): # Call process_job directly without threading try: - job_post = self.process_job(job_card, job_url) + job_post = self.process_job(job_card, job_url, scraper_input.full_description) if job_post: job_list.append(job_post) except Exception as e: raise LinkedInException("Exception occurred while processing jobs") page += 25 + time.sleep(random.uniform(LinkedInScraper.DELAY, LinkedInScraper.DELAY + 2)) job_list = job_list[: scraper_input.results_wanted] return JobResponse(jobs=job_list) - def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]: + def process_job(self, job_card: Tag, job_url: str, full_descr: bool) -> Optional[JobPost]: salary_tag = job_card.find('span', class_='job-search-card__salary-info') compensation = None @@ -171,7 +173,7 @@ class LinkedInScraper(Scraper): if metadata_card else None ) - date_posted = None + date_posted = description = job_type = None if datetime_tag and "datetime" in datetime_tag.attrs: datetime_str = datetime_tag["datetime"] try: @@ -180,21 +182,20 @@ class LinkedInScraper(Scraper): date_posted = None benefits_tag = job_card.find("span", class_="result-benefits__text") benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None - - description, job_type = self.get_job_description(job_url) - # description, job_type = None, [] + if full_descr: + description, job_type = self.get_job_description(job_url) return JobPost( title=title, - description=description, company_name=company, company_url=company_url, location=location, date_posted=date_posted, job_url=job_url, - job_type=job_type, compensation=compensation, benefits=benefits, + job_type=job_type, + description=description, emails=extract_emails_from_text(description) if description else None, num_urgent_words=count_urgent_words(description) if description else None, ) @@ -208,12 +209,10 @@ class LinkedInScraper(Scraper): :return: description or None """ try: - response = requests.get(job_page_url, timeout=5, proxies=self.proxy) + session = create_session(is_tls=False, has_retry=True) + response = session.get(job_page_url, timeout=5, proxies=self.proxy) response.raise_for_status() except requests.HTTPError as e: - if hasattr(e, "response") and e.response is not None: - if e.response.status_code in (429, 502): - time.sleep(self.DELAY) return None, None except Exception as e: return None, None @@ -227,7 +226,7 @@ class LinkedInScraper(Scraper): description = None if div_content: - description = " ".join(div_content.get_text().split()).strip() + description = modify_and_get_description(div_content) def get_job_type( soup_job_type: BeautifulSoup, @@ -292,3 +291,20 @@ class LinkedInScraper(Scraper): return location + @staticmethod + def headers() -> dict: + return { + 'authority': 'www.linkedin.com', + 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'accept-language': 'en-US,en;q=0.9', + 'cache-control': 'max-age=0', + 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', + # 'sec-ch-ua-mobile': '?0', + # 'sec-ch-ua-platform': '"macOS"', + # 'sec-fetch-dest': 'document', + # 'sec-fetch-mode': 'navigate', + # 'sec-fetch-site': 'none', + # 'sec-fetch-user': '?1', + 'upgrade-insecure-requests': '1', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' + } diff --git a/src/jobspy/scrapers/utils.py b/src/jobspy/scrapers/utils.py index 9b38c0e..84e4c0b 100644 --- a/src/jobspy/scrapers/utils.py +++ b/src/jobspy/scrapers/utils.py @@ -8,6 +8,15 @@ from requests.adapters import HTTPAdapter, Retry from ..jobs import JobType +def modify_and_get_description(soup): + for li in soup.find_all('li'): + li.string = "- " + li.get_text() + + description = soup.get_text(separator='\n').strip() + description = re.sub(r'\n+', '\n', description) + return description + + def count_urgent_words(description: str) -> int: """ Count the number of urgent words or phrases in a job description. @@ -29,7 +38,7 @@ def extract_emails_from_text(text: str) -> list[str] | None: return email_regex.findall(text) -def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bool = False): +def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bool = False, delay: int = 1) -> requests.Session: """ Creates a requests session with optional tls, proxy, and retry settings. @@ -51,7 +60,7 @@ def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bo connect=3, status=3, status_forcelist=[500, 502, 503, 504, 429], - backoff_factor=1) + backoff_factor=delay) adapter = HTTPAdapter(max_retries=retries) session.mount('http://', adapter) diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py index b40a7af..16a67f3 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/jobspy/scrapers/ziprecruiter/__init__.py @@ -15,8 +15,8 @@ from concurrent.futures import ThreadPoolExecutor from .. import Scraper, ScraperInput, Site from ..exceptions import ZipRecruiterException -from ..utils import count_urgent_words, extract_emails_from_text, create_session from ...jobs import JobPost, Compensation, Location, JobResponse, JobType, Country +from ..utils import count_urgent_words, extract_emails_from_text, create_session, modify_and_get_description class ZipRecruiterScraper(Scraper): @@ -26,6 +26,8 @@ class ZipRecruiterScraper(Scraper): """ site = Site(Site.ZIP_RECRUITER) self.url = "https://www.ziprecruiter.com" + self.session = create_session(proxy) + self.get_cookies() super().__init__(site, proxy=proxy) self.jobs_per_page = 20 @@ -44,12 +46,10 @@ class ZipRecruiterScraper(Scraper): if continue_token: params["continue"] = continue_token try: - session = create_session(self.proxy, is_tls=True) - response = session.get( + response = self.session.get( f"https://api.ziprecruiter.com/jobs-app/jobs", headers=self.headers(), params=self.add_params(scraper_input), - timeout_seconds=10, ) if response.status_code != 200: raise ZipRecruiterException( @@ -106,9 +106,9 @@ class ZipRecruiterScraper(Scraper): title = job.get("name") job_url = job.get("job_url") - description = BeautifulSoup( - job.get("job_description", "").strip(), "html.parser" - ).get_text() + job_description_html = job.get("job_description", "").strip() + description_soup = BeautifulSoup(job_description_html, "html.parser") + description = modify_and_get_description(description_soup) company = job["hiring_company"].get("name") if "hiring_company" in job else None country_value = "usa" if job.get("job_country") == "US" else "canada" @@ -156,6 +156,11 @@ class ZipRecruiterScraper(Scraper): num_urgent_words=count_urgent_words(description) if description else None, ) + def get_cookies(self): + url="https://api.ziprecruiter.com/jobs-app/event" + data="event_type=session&logged_in=false&number_of_retry=1&property=model%3AiPhone&property=os%3AiOS&property=locale%3Aen_us&property=app_build_number%3A4734&property=app_version%3A91.0&property=manufacturer%3AApple&property=timestamp%3A2024-01-12T12%3A04%3A42-06%3A00&property=screen_height%3A852&property=os_version%3A16.6.1&property=source%3Ainstall&property=screen_width%3A393&property=device_model%3AiPhone%2014%20Pro&property=brand%3AApple" + self.session.post(url, data=data, headers=ZipRecruiterScraper.headers()) + @staticmethod def get_job_type_enum(job_type_str: str) -> list[JobType] | None: for job_type in JobType: @@ -195,12 +200,16 @@ class ZipRecruiterScraper(Scraper): @staticmethod def headers() -> dict: """ - Returns headers needed for ZipRecruiter API requests + Returns headers needed for requests :return: dict - Dictionary containing headers """ return { - 'Host': 'api.ziprecruiter.com', - 'accept': '*/*', - 'authorization': 'Basic YTBlZjMyZDYtN2I0Yy00MWVkLWEyODMtYTI1NDAzMzI0YTcyOg==', - 'Cookie': '__cf_bm=DZ7eJOw6lka.Bwy5jLeDqWanaZ8BJlVAwaXrmcbYnxM-1701505132-0-AfGaVIfTA2kJlmleK14o722vbVwpZ+4UxFznsWv+guvzXSpD9KVEy/+pNzvEZUx88yaEShJwGt3/EVjhHirX/ASustKxg47V/aXRd2XIO2QN; zglobalid=61f94830-1990-4130-b222-d9d0e09c7825.57da9ea9581c.656ae86b; ziprecruiter_browser=018188e0-045b-4ad7-aa50-627a6c3d43aa; ziprecruiter_session=5259b2219bf95b6d2299a1417424bc2edc9f4b38; zva=100000000%3Bvid%3AZWroa0x_F1KEeGeU' + "Host": "api.ziprecruiter.com", + "accept": "*/*", + "x-zr-zva-override": "100000000;vid:ZT1huzm_EQlDTVEc", + "x-pushnotificationid": "0ff4983d38d7fc5b3370297f2bcffcf4b3321c418f5c22dd152a0264707602a0", + "x-deviceid": "D77B3A92-E589-46A4-8A39-6EF6F1D86006", + "user-agent": "Job Search/87.0 (iPhone; CPU iOS 16_6_1 like Mac OS X)", + "authorization": "Basic YTBlZjMyZDYtN2I0Yy00MWVkLWEyODMtYTI1NDAzMzI0YTcyOg==", + "accept-language": "en-US,en;q=0.9", }