mirror of https://github.com/Bunsly/JobSpy
add glassdoor (#66)
parent
93223b6a38
commit
3f2b582445
35
README.md
35
README.md
|
@ -6,13 +6,13 @@
|
|||
|
||||
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/bunsly/15min)** *to
|
||||
work with us.*
|
||||
\
|
||||
|
||||
Check out another project we wrote: ***[HomeHarvest](https://github.com/Bunsly/HomeHarvest)** – a Python package
|
||||
for real estate scraping*
|
||||
|
||||
## Features
|
||||
|
||||
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
|
||||
- Scrapes job postings from **LinkedIn**, **Indeed**, **Glassdoor**, & **ZipRecruiter** simultaneously
|
||||
- Aggregates the job postings in a Pandas DataFrame
|
||||
- Proxy support (HTTP/S, SOCKS)
|
||||
|
||||
|
@ -35,15 +35,15 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
|
|||
from jobspy import scrape_jobs
|
||||
|
||||
jobs = scrape_jobs(
|
||||
site_name=["indeed", "linkedin", "zip_recruiter"],
|
||||
site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
|
||||
search_term="software engineer",
|
||||
location="Dallas, TX",
|
||||
results_wanted=10,
|
||||
country_indeed='USA' # only needed for indeed
|
||||
country_indeed='USA' # only needed for indeed / glassdoor
|
||||
)
|
||||
print(f"Found {len(jobs)} jobs")
|
||||
print(jobs.head())
|
||||
jobs.to_csv("jobs.csv", index=False) # / to_xlsx
|
||||
jobs.to_csv("jobs.csv", index=False) # to_xlsx
|
||||
```
|
||||
|
||||
### Output
|
||||
|
@ -120,30 +120,31 @@ ZipRecruiter searches for jobs in **US/Canada** & uses only the `location` param
|
|||
|
||||
### **Indeed**
|
||||
|
||||
Indeed supports most countries, but the `country_indeed` parameter is required. Additionally, use the `location`
|
||||
Indeed & Glassdoor supports most countries, but the `country_indeed` parameter is required. Additionally, use the `location`
|
||||
parameter to narrow down the location, e.g. city & state if necessary.
|
||||
|
||||
You can specify the following countries when searching on Indeed (use the exact name):
|
||||
You can specify the following countries when searching on Indeed (use the exact name, * indicates support for Glassdoor):
|
||||
|
||||
| | | | |
|
||||
|----------------------|--------------|------------|----------------|
|
||||
| Argentina | Australia | Austria | Bahrain |
|
||||
| Belgium | Brazil | Canada | Chile |
|
||||
| Argentina | Australia* | Austria* | Bahrain |
|
||||
| Belgium* | Brazil* | Canada* | Chile |
|
||||
| China | Colombia | Costa Rica | Czech Republic |
|
||||
| Denmark | Ecuador | Egypt | Finland |
|
||||
| France | Germany | Greece | Hong Kong |
|
||||
| Hungary | India | Indonesia | Ireland |
|
||||
| Israel | Italy | Japan | Kuwait |
|
||||
| Luxembourg | Malaysia | Mexico | Morocco |
|
||||
| Netherlands | New Zealand | Nigeria | Norway |
|
||||
| France* | Germany* | Greece | Hong Kong* |
|
||||
| Hungary | India* | Indonesia | Ireland* |
|
||||
| Israel | Italy* | Japan | Kuwait |
|
||||
| Luxembourg | Malaysia | Mexico* | Morocco |
|
||||
| Netherlands* | New Zealand* | Nigeria | Norway |
|
||||
| Oman | Pakistan | Panama | Peru |
|
||||
| Philippines | Poland | Portugal | Qatar |
|
||||
| Romania | Saudi Arabia | Singapore | South Africa |
|
||||
| South Korea | Spain | Sweden | Switzerland |
|
||||
| Romania | Saudi Arabia | Singapore* | South Africa |
|
||||
| South Korea | Spain* | Sweden | Switzerland* |
|
||||
| Taiwan | Thailand | Turkey | Ukraine |
|
||||
| United Arab Emirates | UK | USA | Uruguay |
|
||||
| United Arab Emirates | UK* | USA* | Uruguay |
|
||||
| Venezuela | Vietnam | | |
|
||||
|
||||
|
||||
## Frequently Asked Questions
|
||||
|
||||
---
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[tool.poetry]
|
||||
name = "python-jobspy"
|
||||
version = "1.1.23"
|
||||
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||
version = "1.1.24"
|
||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/Bunsly/JobSpy"
|
||||
readme = "README.md"
|
||||
|
|
|
@ -6,18 +6,21 @@ from typing import Tuple, Optional
|
|||
from .jobs import JobType, Location
|
||||
from .scrapers.indeed import IndeedScraper
|
||||
from .scrapers.ziprecruiter import ZipRecruiterScraper
|
||||
from .scrapers.glassdoor import GlassdoorScraper
|
||||
from .scrapers.linkedin import LinkedInScraper
|
||||
from .scrapers import ScraperInput, Site, JobResponse, Country
|
||||
from .scrapers.exceptions import (
|
||||
LinkedInException,
|
||||
IndeedException,
|
||||
ZipRecruiterException,
|
||||
GlassdoorException,
|
||||
)
|
||||
|
||||
SCRAPER_MAPPING = {
|
||||
Site.LINKEDIN: LinkedInScraper,
|
||||
Site.INDEED: IndeedScraper,
|
||||
Site.ZIP_RECRUITER: ZipRecruiterScraper,
|
||||
Site.GLASSDOOR: GlassdoorScraper,
|
||||
}
|
||||
|
||||
|
||||
|
@ -90,6 +93,8 @@ def scrape_jobs(
|
|||
raise IndeedException(str(e))
|
||||
if site == Site.ZIP_RECRUITER:
|
||||
raise ZipRecruiterException(str(e))
|
||||
if site == Site.GLASSDOOR:
|
||||
raise GlassdoorException(str(e))
|
||||
else:
|
||||
raise e
|
||||
return site.value, scraped_data
|
||||
|
@ -127,7 +132,10 @@ def scrape_jobs(
|
|||
job_data["emails"] = (
|
||||
", ".join(job_data["emails"]) if job_data["emails"] else None
|
||||
)
|
||||
job_data["location"] = Location(**job_data["location"]).display_location()
|
||||
if job_data["location"]:
|
||||
job_data["location"] = Location(
|
||||
**job_data["location"]
|
||||
).display_location()
|
||||
|
||||
compensation_obj = job_data.get("compensation")
|
||||
if compensation_obj and isinstance(compensation_obj, dict):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from typing import Union, Optional
|
||||
from datetime import date
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, validator
|
||||
|
||||
|
||||
|
@ -56,13 +55,13 @@ class JobType(Enum):
|
|||
|
||||
|
||||
class Country(Enum):
|
||||
ARGENTINA = ("argentina", "ar")
|
||||
AUSTRALIA = ("australia", "au")
|
||||
AUSTRIA = ("austria", "at")
|
||||
ARGENTINA = ("argentina", "com.ar")
|
||||
AUSTRALIA = ("australia", "au", "com.au")
|
||||
AUSTRIA = ("austria", "at", "at")
|
||||
BAHRAIN = ("bahrain", "bh")
|
||||
BELGIUM = ("belgium", "be")
|
||||
BRAZIL = ("brazil", "br")
|
||||
CANADA = ("canada", "ca")
|
||||
BELGIUM = ("belgium", "be", "nl:be")
|
||||
BRAZIL = ("brazil", "br", "com.br")
|
||||
CANADA = ("canada", "ca", "ca")
|
||||
CHILE = ("chile", "cl")
|
||||
CHINA = ("china", "cn")
|
||||
COLOMBIA = ("colombia", "co")
|
||||
|
@ -72,24 +71,24 @@ class Country(Enum):
|
|||
ECUADOR = ("ecuador", "ec")
|
||||
EGYPT = ("egypt", "eg")
|
||||
FINLAND = ("finland", "fi")
|
||||
FRANCE = ("france", "fr")
|
||||
GERMANY = ("germany", "de")
|
||||
FRANCE = ("france", "fr", "fr")
|
||||
GERMANY = ("germany", "de", "de")
|
||||
GREECE = ("greece", "gr")
|
||||
HONGKONG = ("hong kong", "hk")
|
||||
HONGKONG = ("hong kong", "hk", "com.hk")
|
||||
HUNGARY = ("hungary", "hu")
|
||||
INDIA = ("india", "in")
|
||||
INDIA = ("india", "in", "co.in")
|
||||
INDONESIA = ("indonesia", "id")
|
||||
IRELAND = ("ireland", "ie")
|
||||
IRELAND = ("ireland", "ie", "ie")
|
||||
ISRAEL = ("israel", "il")
|
||||
ITALY = ("italy", "it")
|
||||
ITALY = ("italy", "it", "it")
|
||||
JAPAN = ("japan", "jp")
|
||||
KUWAIT = ("kuwait", "kw")
|
||||
LUXEMBOURG = ("luxembourg", "lu")
|
||||
MALAYSIA = ("malaysia", "malaysia")
|
||||
MEXICO = ("mexico", "mx")
|
||||
MEXICO = ("mexico", "mx", "com.mx")
|
||||
MOROCCO = ("morocco", "ma")
|
||||
NETHERLANDS = ("netherlands", "nl")
|
||||
NEWZEALAND = ("new zealand", "nz")
|
||||
NETHERLANDS = ("netherlands", "nl", "nl")
|
||||
NEWZEALAND = ("new zealand", "nz", "co.nz")
|
||||
NIGERIA = ("nigeria", "ng")
|
||||
NORWAY = ("norway", "no")
|
||||
OMAN = ("oman", "om")
|
||||
|
@ -102,19 +101,19 @@ class Country(Enum):
|
|||
QATAR = ("qatar", "qa")
|
||||
ROMANIA = ("romania", "ro")
|
||||
SAUDIARABIA = ("saudi arabia", "sa")
|
||||
SINGAPORE = ("singapore", "sg")
|
||||
SINGAPORE = ("singapore", "sg", "sg")
|
||||
SOUTHAFRICA = ("south africa", "za")
|
||||
SOUTHKOREA = ("south korea", "kr")
|
||||
SPAIN = ("spain", "es")
|
||||
SPAIN = ("spain", "es", "es")
|
||||
SWEDEN = ("sweden", "se")
|
||||
SWITZERLAND = ("switzerland", "ch")
|
||||
SWITZERLAND = ("switzerland", "ch", "de:ch")
|
||||
TAIWAN = ("taiwan", "tw")
|
||||
THAILAND = ("thailand", "th")
|
||||
TURKEY = ("turkey", "tr")
|
||||
UKRAINE = ("ukraine", "ua")
|
||||
UNITEDARABEMIRATES = ("united arab emirates", "ae")
|
||||
UK = ("uk", "uk")
|
||||
USA = ("usa", "www")
|
||||
UK = ("uk", "uk", "co.uk")
|
||||
USA = ("usa", "www", "com")
|
||||
URUGUAY = ("uruguay", "uy")
|
||||
VENEZUELA = ("venezuela", "ve")
|
||||
VIETNAM = ("vietnam", "vn")
|
||||
|
@ -125,31 +124,39 @@ class Country(Enum):
|
|||
# internal for linkeind
|
||||
WORLDWIDE = ("worldwide", "www")
|
||||
|
||||
def __new__(cls, country, domain):
|
||||
obj = object.__new__(cls)
|
||||
obj._value_ = country
|
||||
obj.domain = domain
|
||||
return obj
|
||||
@property
|
||||
def indeed_domain_value(self):
|
||||
return self.value[1]
|
||||
|
||||
@property
|
||||
def domain_value(self):
|
||||
return self.domain
|
||||
def glassdoor_domain_value(self):
|
||||
if len(self.value) == 3:
|
||||
subdomain, _, domain = self.value[2].partition(":")
|
||||
if subdomain and domain:
|
||||
return f"{subdomain}.glassdoor.{domain}"
|
||||
else:
|
||||
return f"www.glassdoor.{self.value[2]}"
|
||||
else:
|
||||
raise Exception(f"Glassdoor is not available for {self.name}")
|
||||
|
||||
def get_url(self):
|
||||
return f"https://{self.glassdoor_domain_value}/"
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, country_str: str):
|
||||
"""Convert a string to the corresponding Country enum."""
|
||||
country_str = country_str.strip().lower()
|
||||
for country in cls:
|
||||
if country.value == country_str:
|
||||
if country.value[0] == country_str:
|
||||
return country
|
||||
valid_countries = [country.value for country in cls]
|
||||
raise ValueError(
|
||||
f"Invalid country string: '{country_str}'. Valid countries (only include this param for Indeed) are: {', '.join(valid_countries)}"
|
||||
f"Invalid country string: '{country_str}'. Valid countries are: {', '.join([country[0] for country in valid_countries])}"
|
||||
)
|
||||
|
||||
|
||||
class Location(BaseModel):
|
||||
country: Country = None
|
||||
country: Country | None = None
|
||||
city: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
|
||||
|
@ -160,10 +167,10 @@ class Location(BaseModel):
|
|||
if self.state:
|
||||
location_parts.append(self.state)
|
||||
if self.country and self.country not in (Country.US_CANADA, Country.WORLDWIDE):
|
||||
if self.country.value in ("usa", "uk"):
|
||||
location_parts.append(self.country.value.upper())
|
||||
if self.country.value[0] in ("usa", "uk"):
|
||||
location_parts.append(self.country.value[0].upper())
|
||||
else:
|
||||
location_parts.append(self.country.value.title())
|
||||
location_parts.append(self.country.value[0].title())
|
||||
return ", ".join(location_parts)
|
||||
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ class Site(Enum):
|
|||
LINKEDIN = "linkedin"
|
||||
INDEED = "indeed"
|
||||
ZIP_RECRUITER = "zip_recruiter"
|
||||
GLASSDOOR = "glassdoor"
|
||||
|
||||
|
||||
class ScraperInput(BaseModel):
|
||||
|
|
|
@ -19,3 +19,8 @@ class IndeedException(Exception):
|
|||
class ZipRecruiterException(Exception):
|
||||
def __init__(self, message=None):
|
||||
super().__init__(message or "An error occurred with ZipRecruiter")
|
||||
|
||||
|
||||
class GlassdoorException(Exception):
|
||||
def __init__(self, message=None):
|
||||
super().__init__(message or "An error occurred with Glassdoor")
|
||||
|
|
|
@ -0,0 +1,279 @@
|
|||
"""
|
||||
jobspy.scrapers.glassdoor
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape Glassdoor.
|
||||
"""
|
||||
import math
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
from datetime import datetime, date
|
||||
from typing import Optional, Tuple, Any
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..exceptions import GlassdoorException
|
||||
from ..utils import count_urgent_words, extract_emails_from_text, create_session
|
||||
from ...jobs import (
|
||||
JobPost,
|
||||
Compensation,
|
||||
CompensationInterval,
|
||||
Location,
|
||||
JobResponse,
|
||||
JobType,
|
||||
Country,
|
||||
)
|
||||
|
||||
|
||||
class GlassdoorScraper(Scraper):
|
||||
def __init__(self, proxy: Optional[str] = None):
|
||||
"""
|
||||
Initializes GlassdoorScraper with the Glassdoor job search url
|
||||
"""
|
||||
site = Site(Site.ZIP_RECRUITER)
|
||||
super().__init__(site, proxy=proxy)
|
||||
|
||||
self.url = None
|
||||
self.country = None
|
||||
self.jobs_per_page = 30
|
||||
self.seen_urls = set()
|
||||
|
||||
def fetch_jobs_page(
|
||||
self,
|
||||
scraper_input: ScraperInput,
|
||||
location_id: int,
|
||||
location_type: str,
|
||||
page_num: int,
|
||||
cursor: str | None,
|
||||
) -> (list[JobPost], str | None):
|
||||
"""
|
||||
Scrapes a page of Glassdoor for jobs with scraper_input criteria
|
||||
:param scraper_input:
|
||||
:return: jobs found on page
|
||||
:return: cursor for next page
|
||||
"""
|
||||
try:
|
||||
payload = self.add_payload(
|
||||
scraper_input, location_id, location_type, page_num, cursor
|
||||
)
|
||||
session = create_session(self.proxy, is_tls=False)
|
||||
response = session.post(
|
||||
f"{self.url}/graph", headers=self.headers(), timeout=10, data=payload
|
||||
)
|
||||
if response.status_code != 200:
|
||||
raise GlassdoorException(
|
||||
f"bad response status code: {response.status_code}"
|
||||
)
|
||||
res_json = response.json()[0]
|
||||
if "errors" in res_json:
|
||||
raise ValueError("Error encountered in API response")
|
||||
except Exception as e:
|
||||
raise GlassdoorException(str(e))
|
||||
|
||||
jobs_data = res_json["data"]["jobListings"]["jobListings"]
|
||||
|
||||
jobs = []
|
||||
for i, job in enumerate(jobs_data):
|
||||
job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][
|
||||
"linkItems"
|
||||
][i]["url"]
|
||||
job = job["jobview"]
|
||||
title = job["job"]["jobTitleText"]
|
||||
company_name = job["header"]["employerNameFromSearch"]
|
||||
location_name = job["header"].get("locationName", "")
|
||||
location_type = job["header"].get("locationType", "")
|
||||
is_remote = False
|
||||
location = None
|
||||
|
||||
if location_type == "S":
|
||||
is_remote = True
|
||||
else:
|
||||
location = self.parse_location(location_name)
|
||||
|
||||
compensation = self.parse_compensation(job["header"])
|
||||
|
||||
job = JobPost(
|
||||
title=title,
|
||||
company_name=company_name,
|
||||
job_url=job_url,
|
||||
location=location,
|
||||
compensation=compensation,
|
||||
is_remote=is_remote,
|
||||
)
|
||||
jobs.append(job)
|
||||
|
||||
return jobs, self.get_cursor_for_page(
|
||||
res_json["data"]["jobListings"]["paginationCursors"], page_num + 1
|
||||
)
|
||||
|
||||
def scrape(self, scraper_input: ScraperInput) -> JobResponse:
|
||||
"""
|
||||
Scrapes Glassdoor for jobs with scraper_input criteria.
|
||||
:param scraper_input: Information about job search criteria.
|
||||
:return: JobResponse containing a list of jobs.
|
||||
"""
|
||||
self.country = scraper_input.country
|
||||
self.url = self.country.get_url()
|
||||
|
||||
location_id, location_type = self.get_location(
|
||||
scraper_input.location, scraper_input.is_remote
|
||||
)
|
||||
all_jobs: list[JobPost] = []
|
||||
cursor = None
|
||||
max_pages = 30
|
||||
|
||||
try:
|
||||
for page in range(
|
||||
1 + (scraper_input.offset // self.jobs_per_page),
|
||||
min(
|
||||
(scraper_input.results_wanted // self.jobs_per_page) + 2,
|
||||
max_pages + 1,
|
||||
),
|
||||
):
|
||||
try:
|
||||
jobs, cursor = self.fetch_jobs_page(
|
||||
scraper_input, location_id, location_type, page, cursor
|
||||
)
|
||||
all_jobs.extend(jobs)
|
||||
if len(all_jobs) >= scraper_input.results_wanted:
|
||||
all_jobs = all_jobs[: scraper_input.results_wanted]
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"Page {page} generated an exception: {e}")
|
||||
except Exception as e:
|
||||
print(f"An exception occurred: {e}")
|
||||
|
||||
return JobResponse(jobs=all_jobs)
|
||||
|
||||
@staticmethod
|
||||
def parse_compensation(data: dict) -> Optional[Compensation]:
|
||||
pay_period = data.get("payPeriod")
|
||||
adjusted_pay = data.get("payPeriodAdjustedPay")
|
||||
currency = data.get("payCurrency", "USD")
|
||||
|
||||
if not pay_period or not adjusted_pay:
|
||||
return None
|
||||
|
||||
interval = None
|
||||
if pay_period == "ANNUAL":
|
||||
interval = CompensationInterval.YEARLY
|
||||
elif pay_period == "MONTHLY":
|
||||
interval = CompensationInterval.MONTHLY
|
||||
elif pay_period == "WEEKLY":
|
||||
interval = CompensationInterval.WEEKLY
|
||||
elif pay_period == "DAILY":
|
||||
interval = CompensationInterval.DAILY
|
||||
elif pay_period == "HOURLY":
|
||||
interval = CompensationInterval.HOURLY
|
||||
|
||||
min_amount = int(adjusted_pay.get("p10") // 1)
|
||||
max_amount = int(adjusted_pay.get("p90") // 1)
|
||||
|
||||
return Compensation(
|
||||
interval=interval,
|
||||
min_amount=min_amount,
|
||||
max_amount=max_amount,
|
||||
currency=currency,
|
||||
)
|
||||
|
||||
def get_job_type_enum(self, job_type_str: str) -> list[JobType] | None:
|
||||
for job_type in JobType:
|
||||
if job_type_str in job_type.value:
|
||||
return [job_type]
|
||||
return None
|
||||
|
||||
def get_location(self, location: str, is_remote: bool) -> (int, str):
|
||||
if not location or is_remote:
|
||||
return "11047", "S" # remote options
|
||||
url = f"{self.url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
|
||||
session = create_session(self.proxy)
|
||||
response = session.get(url)
|
||||
if response.status_code != 200:
|
||||
raise GlassdoorException(
|
||||
f"bad response status code: {response.status_code}"
|
||||
)
|
||||
items = response.json()
|
||||
if not items:
|
||||
raise ValueError(f"Location '{location}' not found on Glassdoor")
|
||||
return int(items[0]["locationId"]), items[0]["locationType"]
|
||||
|
||||
@staticmethod
|
||||
def add_payload(
|
||||
scraper_input,
|
||||
location_id: int,
|
||||
location_type: str,
|
||||
page_num: int,
|
||||
cursor: str | None = None,
|
||||
) -> dict[str, str | Any]:
|
||||
payload = {
|
||||
"operationName": "JobSearchResultsQuery",
|
||||
"variables": {
|
||||
"excludeJobListingIds": [],
|
||||
"filterParams": [],
|
||||
"keyword": scraper_input.search_term,
|
||||
"numJobsToShow": 30,
|
||||
"originalPageUrl": "https://www.glassdoor.com/Job/software-intern-jobs-SRCH_KO0,15.htm",
|
||||
"parameterUrlInput": f"IL.0,12_I{location_type}{location_id}",
|
||||
"seoFriendlyUrlInput": "software-intern-jobs",
|
||||
"seoUrl": True,
|
||||
"pageNumber": page_num,
|
||||
"pageCursor": cursor,
|
||||
},
|
||||
"query": "query JobSearchResultsQuery($excludeJobListingIds: [Long!], $keyword: String, $locationId: Int, $locationType: LocationTypeEnum, $numJobsToShow: Int!, $pageCursor: String, $pageNumber: Int, $filterParams: [FilterParams], $originalPageUrl: String, $seoFriendlyUrlInput: String, $parameterUrlInput: String, $seoUrl: Boolean) {\n jobListings(\n contextHolder: {searchParams: {excludeJobListingIds: $excludeJobListingIds, keyword: $keyword, locationId: $locationId, locationType: $locationType, numPerPage: $numJobsToShow, pageCursor: $pageCursor, pageNumber: $pageNumber, filterParams: $filterParams, originalPageUrl: $originalPageUrl, seoFriendlyUrlInput: $seoFriendlyUrlInput, parameterUrlInput: $parameterUrlInput, seoUrl: $seoUrl, searchType: SR}}\n ) {\n companyFilterOptions {\n id\n shortName\n __typename\n }\n filterOptions\n indeedCtk\n jobListings {\n ...JobView\n __typename\n }\n jobListingSeoLinks {\n linkItems {\n position\n url\n __typename\n }\n __typename\n }\n jobSearchTrackingKey\n jobsPageSeoData {\n pageMetaDescription\n pageTitle\n __typename\n }\n paginationCursors {\n cursor\n pageNumber\n __typename\n }\n indexablePageForSeo\n searchResultsMetadata {\n searchCriteria {\n implicitLocation {\n id\n localizedDisplayName\n type\n __typename\n }\n keyword\n location {\n id\n shortName\n localizedShortName\n localizedDisplayName\n type\n __typename\n }\n __typename\n }\n footerVO {\n countryMenu {\n childNavigationLinks {\n id\n link\n textKey\n __typename\n }\n __typename\n }\n __typename\n }\n helpCenterDomain\n helpCenterLocale\n jobAlert {\n jobAlertExists\n __typename\n }\n jobSerpFaq {\n questions {\n answer\n question\n __typename\n }\n __typename\n }\n jobSerpJobOutlook {\n occupation\n paragraph\n __typename\n }\n showMachineReadableJobs\n __typename\n }\n serpSeoLinksVO {\n relatedJobTitlesResults\n searchedJobTitle\n searchedKeyword\n searchedLocationIdAsString\n searchedLocationSeoName\n searchedLocationType\n topCityIdsToNameResults {\n key\n value\n __typename\n }\n topEmployerIdsToNameResults {\n key\n value\n __typename\n }\n topEmployerNameResults\n topOccupationResults\n __typename\n }\n totalJobsCount\n __typename\n }\n}\n\nfragment JobView on JobListingSearchResult {\n jobview {\n header {\n adOrderId\n advertiserType\n adOrderSponsorshipLevel\n ageInDays\n divisionEmployerName\n easyApply\n employer {\n id\n name\n shortName\n __typename\n }\n employerNameFromSearch\n goc\n gocConfidence\n gocId\n jobCountryId\n jobLink\n jobResultTrackingKey\n jobTitleText\n locationName\n locationType\n locId\n needsCommission\n payCurrency\n payPeriod\n payPeriodAdjustedPay {\n p10\n p50\n p90\n __typename\n }\n rating\n salarySource\n savedJobId\n sponsored\n __typename\n }\n job {\n descriptionFragments\n importConfigId\n jobTitleId\n jobTitleText\n listingId\n __typename\n }\n jobListingAdminDetails {\n cpcVal\n importConfigId\n jobListingId\n jobSourceId\n userEligibleForAdminJobDetails\n __typename\n }\n overview {\n shortName\n squareLogoUrl\n __typename\n }\n __typename\n }\n __typename\n}\n",
|
||||
}
|
||||
|
||||
job_type_filters = {
|
||||
JobType.FULL_TIME: "fulltime",
|
||||
JobType.PART_TIME: "parttime",
|
||||
JobType.CONTRACT: "contract",
|
||||
JobType.INTERNSHIP: "internship",
|
||||
JobType.TEMPORARY: "temporary",
|
||||
}
|
||||
|
||||
if scraper_input.job_type in job_type_filters:
|
||||
filter_value = job_type_filters[scraper_input.job_type]
|
||||
payload["variables"]["filterParams"].append(
|
||||
{"filterKey": "jobType", "values": filter_value}
|
||||
)
|
||||
|
||||
return json.dumps([payload])
|
||||
|
||||
def parse_location(self, location_name: str) -> Location:
|
||||
if not location_name or location_name == "Remote":
|
||||
return None
|
||||
city, _, state = location_name.partition(", ")
|
||||
return Location(city=city, state=state)
|
||||
|
||||
@staticmethod
|
||||
def get_cursor_for_page(pagination_cursors, page_num):
|
||||
for cursor_data in pagination_cursors:
|
||||
if cursor_data["pageNumber"] == page_num:
|
||||
return cursor_data["cursor"]
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def headers() -> dict:
|
||||
"""
|
||||
Returns headers needed for requests
|
||||
:return: dict - Dictionary containing headers
|
||||
"""
|
||||
return {
|
||||
"authority": "www.glassdoor.com",
|
||||
"accept": "*/*",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
"apollographql-client-name": "job-search-next",
|
||||
"apollographql-client-version": "4.65.5",
|
||||
"content-type": "application/json",
|
||||
"cookie": 'gdId=91e2dfc4-c8b5-4fa7-83d0-11512b80262c; G_ENABLED_IDPS=google; trs=https%3A%2F%2Fwww.redhat.com%2F:referral:referral:2023-07-05+09%3A50%3A14.862:undefined:undefined; g_state={"i_p":1688587331651,"i_l":1}; _cfuvid=.7llazxhYFZWi6EISSPdVjtqF0NMVwzxr_E.cB1jgLs-1697828392979-0-604800000; GSESSIONID=undefined; JSESSIONID=F03DD1B5EE02DB6D842FE42B142F88F3; cass=1; jobsClicked=true; indeedCtk=1hd77b301k79i801; asst=1697829114.2; G_AUTHUSER_H=0; uc=8013A8318C98C517FE6DD0024636DFDEF978FC33266D93A2FAFEF364EACA608949D8B8FA2DC243D62DE271D733EB189D809ABE5B08D7B1AE865D217BD4EEBB97C282F5DA5FEFE79C937E3F6110B2A3A0ADBBA3B4B6DF5A996FEE00516100A65FCB11DA26817BE8D1C1BF6CFE36B5B68A3FDC2CFEC83AB797F7841FBB157C202332FC7E077B56BD39B167BDF3D9866E3B; AWSALB=zxc/Yk1nbWXXT6HjNyn3H4h4950ckVsFV/zOrq5LSoChYLE1qV+hDI8Axi3fUa9rlskndcO0M+Fw+ZnJ+AQ2afBFpyOd1acouLMYgkbEpqpQaWhY6/Gv4QH1zBcJ; AWSALBCORS=zxc/Yk1nbWXXT6HjNyn3H4h4950ckVsFV/zOrq5LSoChYLE1qV+hDI8Axi3fUa9rlskndcO0M+Fw+ZnJ+AQ2afBFpyOd1acouLMYgkbEpqpQaWhY6/Gv4QH1zBcJ; gdsid=1697828393025:1697830776351:668396EDB9E6A832022D34414128093D; at=HkH8Hnqi9uaMC7eu0okqyIwqp07ht9hBvE1_St7E_hRqPvkO9pUeJ1Jcpds4F3g6LL5ADaCNlxrPn0o6DumGMfog8qI1-zxaV_jpiFs3pugntw6WpVyYWdfioIZ1IDKupyteeLQEM1AO4zhGjY_rPZynpsiZBPO_B1au94sKv64rv23yvP56OiWKKfI-8_9hhLACEwWvM-Az7X-4aE2QdFt93VJbXbbGVf07bdDZfimsIkTtgJCLSRhU1V0kEM1Efyu66vo3m77gFFaMW7lxyYnb36I5PdDtEXBm3aL-zR7-qa5ywd94ISEivgqQOA4FPItNhqIlX4XrfD1lxVz6rfPaoTIDi4DI6UMCUjwyPsuv8mn0rYqDfRnmJpZ97fJ5AnhrknAd_6ZWN5v1OrxJczHzcXd8LO820QPoqxzzG13bmSTXLwGSxMUCtSrVsq05hicimQ3jpRt0c1dA4OkTNqF7_770B9JfcHcM8cr8-C4IL56dnOjr9KBGfN1Q2IvZM2cOBRbV7okiNOzKVZ3qJ24AE34WA2F3U6Whiu6H8nIuGG5hSNkVygY6CtglNZfFF9p8pJAZm79PngrrBv-CXFBZmhYLFo46lmFetDkiJ6mirtez4tKpzTIYjIp4_JAkiZFwbLJ2QGH4mK8kyyW0lZiX1DTuQec50N_5wvRo0Gt7nlKxzLsApMnaNhuQeH5ygh_pa381ORo9mQGi0EYF9zk00pa2--z4PtjfQ8KFq36GgpxKy5-o4qgqygZj8F01L8r-FiX2G4C7PREMIpAyHX2A4-_JxA1IS2j12EyqKTLqE9VcP06qm2Z-YuIW3ctmpMxy5G9_KiEiGv17weizhSFnl6SbpAEY-2VSmQ5V6jm3hoMp2jemkuGCRkZeFstLDEPxlzFN7WM; __cf_bm=zGaVjIJw4irf40_7UVw54B6Ohm271RUX4Tc8KVScrbs-1697830777-0-AYv2GnKTnnCU+cY9xHbJunO0DwlLDO6SIBnC/s/qldpKsGK0rRAjD6y8lbyATT/KlS7g29OZaN4fbd0lrJg0KmWbIybZIzfWVLHSYePVuOhu; asst=1697829114.2; at=dFhXf64wsf2TlnWy41xLs7skJkuxgKToEGcjGtDfUvW4oEAJ4tTIR5dKQ8wbwT75aIaGgdCfvcb-da7vwrCGWscCncmfLFQpJ9l-LLwoRfk-pMsxHhd77wvf-W7I0HSm7-Q5lQJqI9WyNGRxOa-RpzBTf4L8_Et4-3FzjPaAoYY5pY1FhuwXbN5asGOAMW-p8cjpbfn3PumlIYuckguWnjrcY2F31YJ_1noeoHM9tCGpymANbqGXRkG6aXY7yCfVXtdgZU1K5SMeaSPZIuF_iLUxjc_corzpNiH6qq7BIAmh-e5Aa-g7cwpZcln1fmwTVw4uTMZf1eLIMTa9WzgqZNkvG-sGaq_XxKA_Wai6xTTkOHfRgm4632Ba2963wdJvkGmUUa3tb_L4_wTgk3eFnHp5JhghLfT2Pe3KidP-yX__vx8JOsqe3fndCkKXgVz7xQKe1Dur-sMNlGwi4LXfguTT2YUI8C5Miq3pj2IHc7dC97eyyAiAM4HvyGWfaXWZcei6oIGrOwMvYgy0AcwFry6SIP2SxLT5TrxinRRuem1r1IcOTJsMJyUPp1QsZ7bOyq9G_0060B4CPyovw5523hEuqLTM-R5e5yavY6C_1DHUyE15C3mrh7kdvmlGZeflnHqkFTEKwwOftm-Mv-CKD5Db9ABFGNxKB2FH7nDH67hfOvm4tGNMzceBPKYJ3wciTt9jK3wy39_7cOYVywfrZ-oLhw_XtsbGSSeGn3HytrfgSADAh2sT0Gg6eCC9Xy1vh-Za337SVLUDXZ73W2xJxxUHBkFzZs8L_Xndo5DsbpWhVs9IYUGyraJdqB3SLgDbAppIBCJl4fx6_DG8-xOQPBvuFMlTROe1JVdHOzXI1GElwFDTuH1pjkg4I2G0NhAbE06Y-1illQE; gdsid=1697828393025:1697831731408:99C30D94108AC3030D61C736DDCDF11C',
|
||||
"gd-csrf-token": "Ft6oHEWlRZrxDww95Cpazw:0pGUrkb2y3TyOpAIqF2vbPmUXoXVkD3oEGDVkvfeCerceQ5-n8mBg3BovySUIjmCPHCaW0H2nQVdqzbtsYqf4Q:wcqRqeegRUa9MVLJGyujVXB7vWFPjdaS1CtrrzJq-ok",
|
||||
"origin": "https://www.glassdoor.com",
|
||||
"referer": "https://www.glassdoor.com/",
|
||||
"sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": '"macOS"',
|
||||
"sec-fetch-dest": "empty",
|
||||
"sec-fetch-mode": "cors",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
||||
}
|
|
@ -56,7 +56,7 @@ class IndeedScraper(Scraper):
|
|||
:return: jobs found on page, total number of jobs found for search
|
||||
"""
|
||||
self.country = scraper_input.country
|
||||
domain = self.country.domain_value
|
||||
domain = self.country.indeed_domain_value
|
||||
self.url = f"https://{domain}.indeed.com"
|
||||
|
||||
params = {
|
||||
|
@ -258,12 +258,8 @@ class IndeedScraper(Scraper):
|
|||
except (KeyError, TypeError, IndexError):
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(
|
||||
job_description, "html.parser"
|
||||
)
|
||||
text_content = " ".join(
|
||||
soup.get_text(separator=" ").split()
|
||||
).strip()
|
||||
soup = BeautifulSoup(job_description, "html.parser")
|
||||
text_content = " ".join(soup.get_text(separator=" ").split()).strip()
|
||||
|
||||
return text_content
|
||||
|
||||
|
|
|
@ -18,12 +18,7 @@ from threading import Lock
|
|||
from .. import Scraper, ScraperInput, Site
|
||||
from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type
|
||||
from ..exceptions import LinkedInException
|
||||
from ...jobs import (
|
||||
JobPost,
|
||||
Location,
|
||||
JobResponse,
|
||||
JobType,
|
||||
)
|
||||
from ...jobs import JobPost, Location, JobResponse, JobType, Country
|
||||
|
||||
|
||||
class LinkedInScraper(Scraper):
|
||||
|
@ -181,7 +176,6 @@ class LinkedInScraper(Scraper):
|
|||
location=location,
|
||||
date_posted=date_posted,
|
||||
job_url=job_url,
|
||||
# job_type=[JobType.FULL_TIME],
|
||||
job_type=job_type,
|
||||
benefits=benefits,
|
||||
emails=extract_emails_from_text(description) if description else None,
|
||||
|
@ -246,7 +240,7 @@ class LinkedInScraper(Scraper):
|
|||
:param metadata_card
|
||||
:return: location
|
||||
"""
|
||||
location = Location(country=self.country)
|
||||
location = Location(country=Country.from_string(self.country))
|
||||
if metadata_card is not None:
|
||||
location_tag = metadata_card.find(
|
||||
"span", class_="job-search-card__location"
|
||||
|
@ -258,7 +252,7 @@ class LinkedInScraper(Scraper):
|
|||
location = Location(
|
||||
city=city,
|
||||
state=state,
|
||||
country=self.country,
|
||||
country=Country.from_string(self.country),
|
||||
)
|
||||
|
||||
return location
|
||||
|
|
|
@ -16,13 +16,13 @@ from concurrent.futures import ThreadPoolExecutor
|
|||
from .. import Scraper, ScraperInput, Site
|
||||
from ..exceptions import ZipRecruiterException
|
||||
from ..utils import count_urgent_words, extract_emails_from_text, create_session
|
||||
from ...jobs import JobPost, Compensation, Location, JobResponse, JobType
|
||||
from ...jobs import JobPost, Compensation, Location, JobResponse, JobType, Country
|
||||
|
||||
|
||||
class ZipRecruiterScraper(Scraper):
|
||||
def __init__(self, proxy: Optional[str] = None):
|
||||
"""
|
||||
Initializes LinkedInScraper with the ZipRecruiter job search url
|
||||
Initializes ZipRecruiterScraper with the ZipRecruiter job search url
|
||||
"""
|
||||
site = Site(Site.ZIP_RECRUITER)
|
||||
self.url = "https://www.ziprecruiter.com"
|
||||
|
@ -31,7 +31,9 @@ class ZipRecruiterScraper(Scraper):
|
|||
self.jobs_per_page = 20
|
||||
self.seen_urls = set()
|
||||
|
||||
def find_jobs_in_page(self, scraper_input: ScraperInput, continue_token: str | None = None) -> Tuple[list[JobPost], Optional[str]]:
|
||||
def find_jobs_in_page(
|
||||
self, scraper_input: ScraperInput, continue_token: str | None = None
|
||||
) -> Tuple[list[JobPost], Optional[str]]:
|
||||
"""
|
||||
Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
|
||||
:param scraper_input:
|
||||
|
@ -40,7 +42,7 @@ class ZipRecruiterScraper(Scraper):
|
|||
"""
|
||||
params = self.add_params(scraper_input)
|
||||
if continue_token:
|
||||
params['continue'] = continue_token
|
||||
params["continue"] = continue_token
|
||||
try:
|
||||
session = create_session(self.proxy, is_tls=False)
|
||||
response = session.get(
|
||||
|
@ -61,13 +63,10 @@ class ZipRecruiterScraper(Scraper):
|
|||
time.sleep(5)
|
||||
response_data = response.json()
|
||||
jobs_list = response_data.get("jobs", [])
|
||||
next_continue_token = response_data.get('continue', None)
|
||||
next_continue_token = response_data.get("continue", None)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=self.jobs_per_page) as executor:
|
||||
job_results = [
|
||||
executor.submit(self.process_job, job)
|
||||
for job in jobs_list
|
||||
]
|
||||
job_results = [executor.submit(self.process_job, job) for job in jobs_list]
|
||||
|
||||
job_list = [result.result() for result in job_results if result.result()]
|
||||
return job_list, next_continue_token
|
||||
|
@ -87,7 +86,9 @@ class ZipRecruiterScraper(Scraper):
|
|||
if len(job_list) >= scraper_input.results_wanted:
|
||||
break
|
||||
|
||||
jobs_on_page, continue_token = self.find_jobs_in_page(scraper_input, continue_token)
|
||||
jobs_on_page, continue_token = self.find_jobs_in_page(
|
||||
scraper_input, continue_token
|
||||
)
|
||||
if jobs_on_page:
|
||||
job_list.extend(jobs_on_page)
|
||||
|
||||
|
@ -95,13 +96,13 @@ class ZipRecruiterScraper(Scraper):
|
|||
break
|
||||
|
||||
if len(job_list) > scraper_input.results_wanted:
|
||||
job_list = job_list[:scraper_input.results_wanted]
|
||||
job_list = job_list[: scraper_input.results_wanted]
|
||||
|
||||
return JobResponse(jobs=job_list)
|
||||
|
||||
@staticmethod
|
||||
def process_job(job: dict) -> JobPost:
|
||||
""" Processes an individual job dict from the response """
|
||||
"""Processes an individual job dict from the response"""
|
||||
title = job.get("name")
|
||||
job_url = job.get("job_url")
|
||||
|
||||
|
@ -109,9 +110,12 @@ class ZipRecruiterScraper(Scraper):
|
|||
job.get("job_description", "").strip(), "html.parser"
|
||||
).get_text()
|
||||
|
||||
company = job['hiring_company'].get("name") if "hiring_company" in job else None
|
||||
company = job["hiring_company"].get("name") if "hiring_company" in job else None
|
||||
country_value = "usa" if job.get("job_country") == "US" else "canada"
|
||||
country_enum = Country.from_string(country_value)
|
||||
|
||||
location = Location(
|
||||
city=job.get("job_city"), state=job.get("job_state"), country='usa' if job.get("job_country") == 'US' else 'canada'
|
||||
city=job.get("job_city"), state=job.get("job_state"), country=country_enum
|
||||
)
|
||||
job_type = ZipRecruiterScraper.get_job_type_enum(
|
||||
job.get("employment_type", "").replace("_", "").lower()
|
||||
|
@ -134,9 +138,15 @@ class ZipRecruiterScraper(Scraper):
|
|||
location=location,
|
||||
job_type=job_type,
|
||||
compensation=Compensation(
|
||||
interval="yearly" if job.get("compensation_interval") == "annual" else job.get("compensation_interval"),
|
||||
min_amount=int(job["compensation_min"]) if "compensation_min" in job else None,
|
||||
max_amount=int(job["compensation_max"]) if "compensation_max" in job else None,
|
||||
interval="yearly"
|
||||
if job.get("compensation_interval") == "annual"
|
||||
else job.get("compensation_interval"),
|
||||
min_amount=int(job["compensation_min"])
|
||||
if "compensation_min" in job
|
||||
else None,
|
||||
max_amount=int(job["compensation_max"])
|
||||
if "compensation_max" in job
|
||||
else None,
|
||||
currency=job.get("compensation_currency"),
|
||||
),
|
||||
date_posted=date_posted,
|
||||
|
@ -189,13 +199,13 @@ class ZipRecruiterScraper(Scraper):
|
|||
:return: dict - Dictionary containing headers
|
||||
"""
|
||||
return {
|
||||
'Host': 'api.ziprecruiter.com',
|
||||
'Cookie': 'ziprecruiter_browser=018188e0-045b-4ad7-aa50-627a6c3d43aa; ziprecruiter_session=5259b2219bf95b6d2299a1417424bc2edc9f4b38; SplitSV=2016-10-19%3AU2FsdGVkX19f9%2Bx70knxc%2FeR3xXR8lWoTcYfq5QjmLU%3D%0A; __cf_bm=qXim3DtLPbOL83GIp.ddQEOFVFTc1OBGPckiHYxcz3o-1698521532-0-AfUOCkgCZyVbiW1ziUwyefCfzNrJJTTKPYnif1FZGQkT60dMowmSU/Y/lP+WiygkFPW/KbYJmyc+MQSkkad5YygYaARflaRj51abnD+SyF9V; zglobalid=68d49bd5-0326-428e-aba8-8a04b64bc67c.af2d99ff7c03.653d61bb; ziprecruiter_browser=018188e0-045b-4ad7-aa50-627a6c3d43aa; ziprecruiter_session=5259b2219bf95b6d2299a1417424bc2edc9f4b38',
|
||||
'accept': '*/*',
|
||||
'x-zr-zva-override': '100000000;vid:ZT1huzm_EQlDTVEc',
|
||||
'x-pushnotificationid': '0ff4983d38d7fc5b3370297f2bcffcf4b3321c418f5c22dd152a0264707602a0',
|
||||
'x-deviceid': 'D77B3A92-E589-46A4-8A39-6EF6F1D86006',
|
||||
'user-agent': 'Job Search/87.0 (iPhone; CPU iOS 16_6_1 like Mac OS X)',
|
||||
'authorization': 'Basic YTBlZjMyZDYtN2I0Yy00MWVkLWEyODMtYTI1NDAzMzI0YTcyOg==',
|
||||
'accept-language': 'en-US,en;q=0.9'
|
||||
"Host": "api.ziprecruiter.com",
|
||||
"Cookie": "ziprecruiter_browser=018188e0-045b-4ad7-aa50-627a6c3d43aa; ziprecruiter_session=5259b2219bf95b6d2299a1417424bc2edc9f4b38; SplitSV=2016-10-19%3AU2FsdGVkX19f9%2Bx70knxc%2FeR3xXR8lWoTcYfq5QjmLU%3D%0A; __cf_bm=qXim3DtLPbOL83GIp.ddQEOFVFTc1OBGPckiHYxcz3o-1698521532-0-AfUOCkgCZyVbiW1ziUwyefCfzNrJJTTKPYnif1FZGQkT60dMowmSU/Y/lP+WiygkFPW/KbYJmyc+MQSkkad5YygYaARflaRj51abnD+SyF9V; zglobalid=68d49bd5-0326-428e-aba8-8a04b64bc67c.af2d99ff7c03.653d61bb; ziprecruiter_browser=018188e0-045b-4ad7-aa50-627a6c3d43aa; ziprecruiter_session=5259b2219bf95b6d2299a1417424bc2edc9f4b38",
|
||||
"accept": "*/*",
|
||||
"x-zr-zva-override": "100000000;vid:ZT1huzm_EQlDTVEc",
|
||||
"x-pushnotificationid": "0ff4983d38d7fc5b3370297f2bcffcf4b3321c418f5c22dd152a0264707602a0",
|
||||
"x-deviceid": "D77B3A92-E589-46A4-8A39-6EF6F1D86006",
|
||||
"user-agent": "Job Search/87.0 (iPhone; CPU iOS 16_6_1 like Mac OS X)",
|
||||
"authorization": "Basic YTBlZjMyZDYtN2I0Yy00MWVkLWEyODMtYTI1NDAzMzI0YTcyOg==",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import pandas as pd
|
|||
|
||||
def test_all():
|
||||
result = scrape_jobs(
|
||||
site_name=["linkedin", "indeed", "zip_recruiter"],
|
||||
site_name=["linkedin", "indeed", "zip_recruiter", "glassdoor"],
|
||||
search_term="software engineer",
|
||||
results_wanted=5,
|
||||
)
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
from ..jobspy import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_indeed():
|
||||
result = scrape_jobs(
|
||||
site_name="glassdoor", search_term="software engineer", country_indeed="USA"
|
||||
)
|
||||
assert (
|
||||
isinstance(result, pd.DataFrame) and not result.empty
|
||||
), "Result should be a non-empty DataFrame"
|
|
@ -4,8 +4,7 @@ import pandas as pd
|
|||
|
||||
def test_indeed():
|
||||
result = scrape_jobs(
|
||||
site_name="indeed",
|
||||
search_term="software engineer",
|
||||
site_name="indeed", search_term="software engineer", country_indeed="usa"
|
||||
)
|
||||
assert (
|
||||
isinstance(result, pd.DataFrame) and not result.empty
|
||||
|
|
Loading…
Reference in New Issue