add indeed country support

pull/38/head
Cullen Watson 2023-09-05 12:11:26 -05:00
parent 1598d4ff63
commit c2bb3b2a03
12 changed files with 1657 additions and 522 deletions

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,11 @@ import pandas as pd
jobs: pd.DataFrame = scrape_jobs(
site_name=["indeed", "linkedin", "zip_recruiter"],
search_term="software engineer",
results_wanted=10
location="Dallas, TX",
results_wanted=10,
# country: only needed for indeed
country='USA'
)
if jobs.empty:
@ -65,8 +69,10 @@ Optional
├── is_remote (bool)
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
├── easy_apply (bool): filters for jobs on LinkedIn that have the 'Easy Apply' option
├── country (enum): uses the corresponding subdomain on Indeed (e.g. Canada on Indeed is ca.indeed.com
```
### JobPost Schema
```plaintext
JobPost
@ -81,12 +87,91 @@ JobPost
├── job_type (enum)
├── compensation (object)
│ ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
│ ├── min_amount (float)
│ ├── max_amount (float)
│ ├── min_amount (int)
│ ├── max_amount (int)
│ └── currency (str)
└── date_posted (datetime)
```
## Supported Countries for Job Searching
### **LinkedIn**
LinkedIn searches globally. Use the `location` parameter
### **ZipRecruiter**
ZipRecruiter searches for jobs in US/Canada. Use the `location` parameter
### **Indeed**
For Indeed, you `location` along with `country` param
You can specify the following countries when searching on Indeed (use the exact name):
- Argentina
- Australia
- Austria
- Bahrain
- Belgium
- Brazil
- Canada
- Chile
- China
- Colombia
- Costa Rica
- Czech Republic
- Denmark
- Ecuador
- Egypt
- Finland
- France
- Germany
- Greece
- Hong Kong
- Hungary
- India
- Indonesia
- Ireland
- Israel
- Italy
- Japan
- Kuwait
- Luxembourg
- Malaysia
- Mexico
- Morocco
- Netherlands
- New Zealand
- Nigeria
- Norway
- Oman
- Pakistan
- Panama
- Peru
- Philippines
- Poland
- Portugal
- Qatar
- Romania
- Saudi Arabia
- Singapore
- South Africa
- South Korea
- Spain
- Sweden
- Switzerland
- Taiwan
- Thailand
- Turkey
- Ukraine
- United Arab Emirates
- UK
- USA
- Uruguay
- Venezuela
- Vietnam
## Frequently Asked Questions

27
main.py Normal file
View File

@ -0,0 +1,27 @@
from jobspy import scrape_jobs
import pandas as pd
jobs: pd.DataFrame = scrape_jobs(
site_name=["indeed", "linkedin", "zip_recruiter"],
search_term="software engineer",
results_wanted=10,
# country: only needed for indeed
country='hong kong'
)
if jobs.empty:
print("No jobs found.")
else:
#1 print
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
print(jobs)
#2 display in Jupyter Notebook
#display(jobs)
#3 output to .csv
#jobs.to_csv('jobs.csv', index=False)

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.0.3"
version = "1.1.0"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
readme = "README.md"

View File

@ -1,7 +1,7 @@
import pandas as pd
from typing import List, Tuple
from .jobs import JobType
from .jobs import JobType, Location
from .scrapers.indeed import IndeedScraper
from .scrapers.ziprecruiter import ZipRecruiterScraper
from .scrapers.linkedin import LinkedInScraper
@ -9,6 +9,7 @@ from .scrapers import (
ScraperInput,
Site,
JobResponse,
Country
)
@ -32,6 +33,7 @@ def scrape_jobs(
job_type: JobType = None,
easy_apply: bool = False, # linkedin
results_wanted: int = 15,
country: str = 'usa'
) -> pd.DataFrame:
"""
Asynchronously scrapes job data from multiple job sites.
@ -41,9 +43,12 @@ def scrape_jobs(
if type(site_name) == str:
site_name = _map_str_to_site(site_name)
country_enum = Country.from_string(country)
site_type = [site_name] if type(site_name) == Site else site_name
scraper_input = ScraperInput(
site_type=site_type,
country=country_enum,
search_term=search_term,
location=location,
distance=distance,
@ -71,22 +76,15 @@ def scrape_jobs(
for job in job_response.jobs:
data = job.dict()
data["site"] = site
# Formatting JobType
data["job_type"] = data["job_type"].value if data["job_type"] else None
# Formatting Location
location_obj = data.get("location")
if location_obj and isinstance(location_obj, dict):
data["city"] = location_obj.get("city", "")
data["state"] = location_obj.get("state", "")
data["country"] = location_obj.get("country", "USA")
data['company'] = data['company_name']
if data["job_type"]:
# Take the first value from the job type tuple
data["job_type"] = data["job_type"].value[0]
else:
data["city"] = None
data["state"] = None
data["country"] = None
data["job_type"] = None
data['location'] = Location(**data['location']).display_location()
# Formatting Compensation
compensation_obj = data.get("compensation")
if compensation_obj and isinstance(compensation_obj, dict):
data["interval"] = (
@ -111,13 +109,13 @@ def scrape_jobs(
desired_order = [
"site",
"title",
"company_name",
"city",
"state",
"company",
'location',
"job_type",
"interval",
"min_amount",
"max_amount",
"currency",
"job_url",
"description",
]

View File

@ -6,24 +6,130 @@ from pydantic import BaseModel, validator
class JobType(Enum):
FULL_TIME = "fulltime"
PART_TIME = "parttime"
CONTRACT = "contract"
TEMPORARY = "temporary"
INTERNSHIP = "internship"
FULL_TIME = ("fulltime", "períodointegral", "estágio/trainee", "cunormăîntreagă", "tiempocompleto", "vollzeit", "voltijds", "tempointegral", "全职", 'plnýúvazek', 'fuldtid', 'دوامكامل' ,
'kokopäivätyö', 'tempsplein', 'vollzeit', 'πλήρηςαπασχόληση', 'teljesmunkaidő', 'tempopieno', 'tempsplein', 'heltid', 'jornadacompleta', 'pełnyetat', '정규직', '100%', '全職',
'งานประจำ', 'tamzamanlı', 'повназайнятість', 'toànthờigian')
PART_TIME = ("parttime", "teilzeit")
CONTRACT = ("contract", "contractor")
TEMPORARY = ("temporary",)
INTERNSHIP = ("internship", "prácticas", 'ojt(onthejobtraining)', 'praktikum')
PER_DIEM = "perdiem"
NIGHTS = "nights"
OTHER = "other"
SUMMER = "summer"
VOLUNTEER = "volunteer"
PER_DIEM = ("perdiem",)
NIGHTS = ("nights",)
OTHER = ("other",)
SUMMER = ("summer",)
VOLUNTEER = ("volunteer",)
class Country(Enum):
ARGENTINA = ('argentina', 'ar')
AUSTRALIA = ('australia', 'au')
AUSTRIA = ('austria', 'at')
BAHRAIN = ('bahrain', 'bh')
BELGIUM = ('belgium', 'be')
BRAZIL = ('brazil', 'br')
CANADA = ('canada', 'ca')
CHILE = ('chile', 'cl')
CHINA = ('china', 'cn')
COLOMBIA = ('colombia', 'co')
COSTARICA = ('costa rica', 'cr')
CZECHREPUBLIC = ('czech republic', 'cz')
DENMARK = ('denmark', 'dk')
ECUADOR = ('ecuador', 'ec')
EGYPT = ('egypt', 'eg')
FINLAND = ('finland', 'fi')
FRANCE = ('france', 'fr')
GERMANY = ('germany', 'de')
GREECE = ('greece', 'gr')
HONGKONG = ('hong kong', 'hk')
HUNGARY = ('hungary', 'hu')
INDIA = ('india', 'in')
INDONESIA = ('indonesia', 'id')
IRELAND = ('ireland', 'ie')
ISRAEL = ('israel', 'il')
ITALY = ('italy', 'it')
JAPAN = ('japan', 'jp')
KUWAIT = ('kuwait', 'kw')
LUXEMBOURG = ('luxembourg', 'lu')
MALAYSIA = ('malaysia', 'malaysia')
MEXICO = ('mexico', 'mx')
MOROCCO = ('morocco', 'ma')
NETHERLANDS = ('netherlands', 'nl')
NEWZEALAND = ('new zealand', 'nz')
NIGERIA = ('nigeria', 'ng')
NORWAY = ('norway', 'no')
OMAN = ('oman', 'om')
PAKISTAN = ('pakistan', 'pk')
PANAMA = ('panama', 'pa')
PERU = ('peru', 'pe')
PHILIPPINES = ('philippines', 'ph')
POLAND = ('poland', 'pl')
PORTUGAL = ('portugal', 'pt')
QATAR = ('qatar', 'qa')
ROMANIA = ('romania', 'ro')
SAUDIARABIA = ('saudi arabia', 'sa')
SINGAPORE = ('singapore', 'sg')
SOUTHAFRICA = ('south africa', 'za')
SOUTHKOREA = ('south korea', 'kr')
SPAIN = ('spain', 'es')
SWEDEN = ('sweden', 'se')
SWITZERLAND = ('switzerland', 'ch')
TAIWAN = ('taiwan', 'tw')
THAILAND = ('thailand', 'th')
TURKEY = ('turkey', 'tr')
UKRAINE = ('ukraine', 'ua')
UNITEDARABEMIRATES = ('united arab emirates', 'ae')
UK = ('uk', 'uk')
USA = ('usa', 'www')
URUGUAY = ('uruguay', 'uy')
VENEZUELA = ('venezuela', 've')
VIETNAM = ('vietnam', 'vn')
# internal for ziprecruiter
US_CANADA = ('usa/ca', 'www')
# internal for linkeind
WORLDWIDE = ('worldwide', 'www')
def __new__(cls, country, domain):
obj = object.__new__(cls)
obj._value_ = country
obj.domain = domain
return obj
@property
def domain_value(self):
return self.domain
@classmethod
def from_string(cls, country_str: str):
"""Convert a string to the corresponding Country enum."""
country_str = country_str.strip().lower()
for country in cls:
if country.value == country_str:
return country
valid_countries = [country.value for country in cls]
raise ValueError(f"Invalid country string: '{country_str}'. Valid countries (only include this param for Indeed) are: {', '.join(valid_countries)}")
class Location(BaseModel):
country: str = "USA"
city: str = None
country: Country = None
city: Optional[str] = None
state: Optional[str] = None
def display_location(self) -> str:
location_parts = []
if self.city:
location_parts.append(self.city)
if self.state:
location_parts.append(self.state)
if self.country and self.country not in (Country.US_CANADA, Country.WORLDWIDE):
if self.country.value in ('usa', 'uk'):
location_parts.append(self.country.value.upper())
else:
location_parts.append(self.country.value.title())
return ", ".join(location_parts)
class CompensationInterval(Enum):
YEARLY = "yearly"
@ -37,7 +143,7 @@ class Compensation(BaseModel):
interval: CompensationInterval
min_amount: int = None
max_amount: int = None
currency: str = "USD"
currency: Optional[str] = "USD"
class JobPost(BaseModel):

View File

@ -1,4 +1,4 @@
from ..jobs import Enum, BaseModel, JobType, JobResponse
from ..jobs import Enum, BaseModel, JobType, JobResponse, Country
from typing import List, Optional, Any
@ -18,6 +18,7 @@ class ScraperInput(BaseModel):
search_term: str
location: str = None
country: Optional[Country] = Country.USA
distance: Optional[int] = None
is_remote: bool = False
job_type: Optional[JobType] = None
@ -35,9 +36,8 @@ class CommonResponse(BaseModel):
class Scraper:
def __init__(self, site: Site, url: str):
def __init__(self, site: Site):
self.site = site
self.url = url
def scrape(self, scraper_input: ScraperInput) -> JobResponse:
...

View File

@ -1,6 +1,8 @@
import re
import math
import io
import json
import traceback
from datetime import datetime
from typing import Optional
@ -18,7 +20,7 @@ from ...jobs import (
JobResponse,
JobType,
)
from .. import Scraper, ScraperInput, Site, StatusException
from .. import Scraper, ScraperInput, Site, Country, StatusException
class ParsingException(Exception):
@ -31,12 +33,13 @@ class IndeedScraper(Scraper):
Initializes IndeedScraper with the Indeed job search url
"""
site = Site(Site.INDEED)
url = "https://www.indeed.com"
super().__init__(site, url)
super().__init__(site)
self.jobs_per_page = 15
self.seen_urls = set()
def scrape_page(
self, scraper_input: ScraperInput, page: int, session: tls_client.Session
) -> tuple[list[JobPost], int]:
@ -47,16 +50,21 @@ class IndeedScraper(Scraper):
:param session:
:return: jobs found on page, total number of jobs found for search
"""
self.country = scraper_input.country
domain = self.country.domain_value
self.url = f"https://{domain}.indeed.com"
job_list = []
params = {
"q": scraper_input.search_term,
"l": scraper_input.location,
"radius": scraper_input.distance,
"filter": 0,
"start": 0 + page * 10,
}
if scraper_input.distance:
params["radius"] = scraper_input.distance
sc_values = []
if scraper_input.is_remote:
sc_values.append("attr(DSQF7)")
@ -65,12 +73,15 @@ class IndeedScraper(Scraper):
if sc_values:
params["sc"] = "0kf:" + "".join(sc_values) + ";"
response = session.get(self.url + "/jobs", params=params)
response = session.get(self.url + "/jobs", params=params, allow_redirects=True)
# print(response.status_code)
if response.status_code != 200 and response.status_code != 307:
if response.status_code not in range(200, 400):
raise StatusException(response.status_code)
soup = BeautifulSoup(response.content, "html.parser")
with open('text2.html', 'w', encoding='utf-8') as f:
f.write(str(soup))
if "did not match any jobs" in str(soup):
raise ParsingException("Search did not match any jobs")
@ -92,7 +103,6 @@ class IndeedScraper(Scraper):
if job_url in self.seen_urls:
return None
snippet_html = BeautifulSoup(job["snippet"], "html.parser")
extracted_salary = job.get("extractedSalary")
compensation = None
@ -118,11 +128,12 @@ class IndeedScraper(Scraper):
date_posted = date_posted.strftime("%Y-%m-%d")
description = self.get_description(job_url, session)
li_elements = snippet_html.find_all("li")
if description is None and li_elements:
description = " ".join(li.text for li in li_elements)
with io.StringIO(job["snippet"]) as f:
soup = BeautifulSoup(f, "html.parser")
li_elements = soup.find_all("li")
if description is None and li_elements:
description = " ".join(li.text for li in li_elements)
first_li = snippet_html.find("li")
job_post = JobPost(
title=job["normTitle"],
description=description,
@ -130,6 +141,7 @@ class IndeedScraper(Scraper):
location=Location(
city=job.get("jobLocationCity"),
state=job.get("jobLocationState"),
country=self.country
),
job_type=job_type,
compensation=compensation,
@ -138,7 +150,7 @@ class IndeedScraper(Scraper):
)
return job_post
with ThreadPoolExecutor(max_workers=10) as executor:
with ThreadPoolExecutor(max_workers=1) as executor:
job_results: list[Future] = [
executor.submit(process_job, job)
for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
@ -166,7 +178,7 @@ class IndeedScraper(Scraper):
#: get first page to initialize session
job_list, total_results = self.scrape_page(scraper_input, 0, session)
with ThreadPoolExecutor(max_workers=10) as executor:
with ThreadPoolExecutor(max_workers=1) as executor:
futures: list[Future] = [
executor.submit(self.scrape_page, scraper_input, page, session)
for page in range(1, pages_to_process + 1)
@ -188,6 +200,7 @@ class IndeedScraper(Scraper):
error=f"Indeed failed to parse response: {e}",
)
except Exception as e:
print(f"LinkedIn failed to scrape: {e}\n{traceback.format_exc()}")
return JobResponse(
success=False,
error=f"Indeed failed to scrape: {e}",
@ -215,17 +228,23 @@ class IndeedScraper(Scraper):
jk_value = params.get("jk", [None])[0]
formatted_url = f"{self.url}/viewjob?jk={jk_value}&spa=1"
response = session.get(formatted_url, allow_redirects=True)
try:
response = session.get(formatted_url, allow_redirects=True, timeout_seconds=5)
except requests.exceptions.Timeout:
print("The request timed out.")
return None
if response.status_code not in range(200, 400):
print('status code not in range')
return None
raw_description = response.json()["body"]["jobInfoWrapperModel"][
"jobInfoModel"
]["sanitizedJobDescription"]
soup = BeautifulSoup(raw_description, "html.parser")
text_content = " ".join(soup.get_text().split()).strip()
return text_content
with io.StringIO(raw_description) as f:
soup = BeautifulSoup(f, "html.parser")
text_content = " ".join(soup.get_text().split()).strip()
return text_content
@staticmethod
def get_job_type(job: dict) -> Optional[JobType]:
@ -237,13 +256,18 @@ class IndeedScraper(Scraper):
for taxonomy in job["taxonomyAttributes"]:
if taxonomy["label"] == "job-types":
if len(taxonomy["attributes"]) > 0:
job_type_str = (
taxonomy["attributes"][0]["label"]
.replace("-", "_")
.replace(" ", "_")
.upper()
)
return JobType[job_type_str]
label = taxonomy["attributes"][0].get("label")
if label:
job_type_str = label.replace("-", "").replace(" ", "").lower()
# print(f"Debug: job_type_str = {job_type_str}")
return IndeedScraper.get_enum_from_value(job_type_str)
return None
@staticmethod
def get_enum_from_value(value_str):
for job_type in JobType:
if value_str in job_type.value:
return job_type
return None
@staticmethod
@ -294,7 +318,7 @@ class IndeedScraper(Scraper):
:param soup:
:return: total_num_jobs
"""
script = soup.find("script", string=lambda t: "window._initialData" in t)
script = soup.find("script", string=lambda t: t and "window._initialData" in t)
pattern = re.compile(r"window._initialData\s*=\s*({.*})\s*;", re.DOTALL)
match = pattern.search(script.string)

View File

@ -22,8 +22,8 @@ class LinkedInScraper(Scraper):
Initializes LinkedInScraper with the LinkedIn job search url
"""
site = Site(Site.LINKEDIN)
url = "https://www.linkedin.com"
super().__init__(site, url)
self.url = "https://www.linkedin.com"
super().__init__(site)
def scrape(self, scraper_input: ScraperInput) -> JobResponse:
"""
@ -31,6 +31,7 @@ class LinkedInScraper(Scraper):
:param scraper_input:
:return: job_response
"""
self.country = 'worldwide'
job_list: list[JobPost] = []
seen_urls = set()
page, processed_jobs, job_count = 0, 0, 0
@ -104,7 +105,7 @@ class LinkedInScraper(Scraper):
metadata_card = job_info.find(
"div", class_="base-search-card__metadata"
)
location: Location = LinkedInScraper.get_location(metadata_card)
location: Location = self.get_location(metadata_card)
datetime_tag = metadata_card.find(
"time", class_="job-search-card__listdate"
@ -125,7 +126,7 @@ class LinkedInScraper(Scraper):
job_url=job_url,
job_type=job_type,
compensation=Compensation(
interval=CompensationInterval.YEARLY, currency="USD"
interval=CompensationInterval.YEARLY, currency=None
),
)
job_list.append(job_post)
@ -195,17 +196,24 @@ class LinkedInScraper(Scraper):
employment_type = employment_type.lower()
employment_type = employment_type.replace("-", "")
return JobType(employment_type)
return LinkedInScraper.get_enum_from_value(employment_type)
return text_content, get_job_type(soup)
@staticmethod
def get_location(metadata_card: Optional[Tag]) -> Location:
def get_enum_from_value(value_str):
for job_type in JobType:
if value_str in job_type.value:
return job_type
return None
def get_location(self, metadata_card: Optional[Tag]) -> Location:
"""
Extracts the location data from the job metadata card.
:param metadata_card
:return: location
"""
location = Location(country=self.country)
if metadata_card is not None:
location_tag = metadata_card.find(
"span", class_="job-search-card__location"
@ -217,6 +225,7 @@ class LinkedInScraper(Scraper):
location = Location(
city=city,
state=state,
country=self.country,
)
return location

View File

@ -1,6 +1,7 @@
import math
import json
import re
import traceback
from datetime import datetime
from typing import Optional, Tuple
from urllib.parse import urlparse, parse_qs
@ -18,6 +19,7 @@ from ...jobs import (
Location,
JobResponse,
JobType,
Country
)
@ -27,8 +29,8 @@ class ZipRecruiterScraper(Scraper):
Initializes LinkedInScraper with the ZipRecruiter job search url
"""
site = Site(Site.ZIP_RECRUITER)
url = "https://www.ziprecruiter.com"
super().__init__(site, url)
self.url = "https://www.ziprecruiter.com"
super().__init__(site)
self.jobs_per_page = 20
self.seen_urls = set()
@ -80,8 +82,10 @@ class ZipRecruiterScraper(Scraper):
self.url + "/jobs-search",
headers=ZipRecruiterScraper.headers(),
params=params,
allow_redirects=True
)
# print(response.status_code)
if response.status_code != 200:
raise StatusException(response.status_code)
@ -144,6 +148,7 @@ class ZipRecruiterScraper(Scraper):
error=f"ZipRecruiter returned status code {e.status_code}",
)
except Exception as e:
print(f"ZipRecruiter failed to scrape: {e}\n{traceback.format_exc()}")
return JobResponse(
success=False,
error=f"ZipRecruiter failed to scrape: {e}",
@ -181,15 +186,12 @@ class ZipRecruiterScraper(Scraper):
description = job.find("p", {"class": "job_snippet"}).text.strip()
job_type_element = job.find("li", {"class": "perk_item perk_type"})
job_type = None
if job_type_element:
job_type_text = (
job_type_element.text.strip().lower().replace("-", "").replace(" ", "")
)
if job_type_text == "contractor":
job_type_text = "contract"
job_type = JobType(job_type_text)
else:
job_type = None
job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text)
date_posted = ZipRecruiterScraper.get_date_posted(job)
@ -206,16 +208,15 @@ class ZipRecruiterScraper(Scraper):
return job_post
def process_job_js(self, job: dict) -> JobPost:
# Map the job data to the expected fields by the Pydantic model
title = job.get("Title")
description = BeautifulSoup(
job.get("Snippet", "").strip(), "html.parser"
).get_text()
company = job.get("OrgName")
location = Location(city=job.get("City"), state=job.get("State"))
location = Location(city=job.get("City"), state=job.get("State"), country=Country.US_CANADA)
try:
job_type = ZipRecruiterScraper.job_type_from_string(
job_type = ZipRecruiterScraper.get_job_type_enum(
job.get("EmploymentType", "").replace("-", "_").lower()
)
except ValueError:
@ -244,6 +245,7 @@ class ZipRecruiterScraper(Scraper):
interval=CompensationInterval.YEARLY,
min_amount=min_amount,
max_amount=max_amount,
currency = "USD/CAD"
)
save_job_url = job.get("SaveJobURL", "")
posted_time_match = re.search(
@ -270,17 +272,18 @@ class ZipRecruiterScraper(Scraper):
return job_post
@staticmethod
def job_type_from_string(value: str) -> Optional[JobType]:
if not value:
return None
def get_enum_from_value(value_str):
for job_type in JobType:
if value_str in job_type.value:
return job_type
return None
if value.lower() == "contractor":
value = "contract"
normalized_value = value.replace("_", "")
for item in JobType:
if item.value == normalized_value:
return item
raise ValueError(f"Invalid value for JobType: {value}")
@staticmethod
def get_job_type_enum(job_type_str: str) -> Optional[JobType]:
for job_type in JobType:
if job_type_str in job_type.value:
return job_type
return None
def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]:
"""
@ -289,11 +292,13 @@ class ZipRecruiterScraper(Scraper):
:param session:
:return: description or None, response url
"""
response = self.session.get(
job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
)
if response.status_code not in range(200, 400):
return None, None
try:
response = self.session.get(
job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True, timeout_seconds=5
)
except requests.exceptions.Timeout:
print("The request timed out.")
return None
html_string = response.content
soup_job = BeautifulSoup(html_string, "html.parser")
@ -375,7 +380,7 @@ class ZipRecruiterScraper(Scraper):
amounts.append(amount)
compensation = Compensation(
interval=interval, min_amount=min(amounts), max_amount=max(amounts)
interval=interval, min_amount=min(amounts), max_amount=max(amounts), currency="USD/CAD"
)
return compensation
@ -402,6 +407,7 @@ class ZipRecruiterScraper(Scraper):
return Location(
city=city,
state=state,
country=Country.US_CANADA
)
@staticmethod

0
text.html Normal file
View File

265
text2.html Normal file

File diff suppressed because one or more lines are too long