mirror of https://github.com/Bunsly/JobSpy
FEAT: Optional convertion to annual and know salary source (#170)
parent
de70189fa2
commit
2a0cba8c7e
|
@ -10,7 +10,7 @@ from .scrapers.indeed import IndeedScraper
|
||||||
from .scrapers.ziprecruiter import ZipRecruiterScraper
|
from .scrapers.ziprecruiter import ZipRecruiterScraper
|
||||||
from .scrapers.glassdoor import GlassdoorScraper
|
from .scrapers.glassdoor import GlassdoorScraper
|
||||||
from .scrapers.linkedin import LinkedInScraper
|
from .scrapers.linkedin import LinkedInScraper
|
||||||
from .scrapers import ScraperInput, Site, JobResponse, Country
|
from .scrapers import SalarySource, ScraperInput, Site, JobResponse, Country
|
||||||
from .scrapers.exceptions import (
|
from .scrapers.exceptions import (
|
||||||
LinkedInException,
|
LinkedInException,
|
||||||
IndeedException,
|
IndeedException,
|
||||||
|
@ -36,6 +36,7 @@ def scrape_jobs(
|
||||||
linkedin_company_ids: list[int] | None = None,
|
linkedin_company_ids: list[int] | None = None,
|
||||||
offset: int | None = 0,
|
offset: int | None = 0,
|
||||||
hours_old: int = None,
|
hours_old: int = None,
|
||||||
|
enforce_annual_salary: bool = True,
|
||||||
verbose: int = 2,
|
verbose: int = 2,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
|
@ -165,7 +166,8 @@ def scrape_jobs(
|
||||||
job_data["min_amount"] = compensation_obj.get("min_amount")
|
job_data["min_amount"] = compensation_obj.get("min_amount")
|
||||||
job_data["max_amount"] = compensation_obj.get("max_amount")
|
job_data["max_amount"] = compensation_obj.get("max_amount")
|
||||||
job_data["currency"] = compensation_obj.get("currency", "USD")
|
job_data["currency"] = compensation_obj.get("currency", "USD")
|
||||||
if (
|
job_data["salary_source"] = SalarySource.DIRECT_DATA.value
|
||||||
|
if enforce_annual_salary and (
|
||||||
job_data["interval"]
|
job_data["interval"]
|
||||||
and job_data["interval"] != "yearly"
|
and job_data["interval"] != "yearly"
|
||||||
and job_data["min_amount"]
|
and job_data["min_amount"]
|
||||||
|
@ -180,7 +182,9 @@ def scrape_jobs(
|
||||||
job_data["min_amount"],
|
job_data["min_amount"],
|
||||||
job_data["max_amount"],
|
job_data["max_amount"],
|
||||||
job_data["currency"],
|
job_data["currency"],
|
||||||
) = extract_salary(job_data["description"])
|
) = extract_salary(job_data["description"], enforce_annual_salary=enforce_annual_salary)
|
||||||
|
job_data["salary_source"] = SalarySource.DESCRIPTION.value
|
||||||
|
|
||||||
|
|
||||||
job_df = pd.DataFrame([job_data])
|
job_df = pd.DataFrame([job_data])
|
||||||
jobs_dfs.append(job_df)
|
jobs_dfs.append(job_df)
|
||||||
|
|
|
@ -18,6 +18,9 @@ class Site(Enum):
|
||||||
ZIP_RECRUITER = "zip_recruiter"
|
ZIP_RECRUITER = "zip_recruiter"
|
||||||
GLASSDOOR = "glassdoor"
|
GLASSDOOR = "glassdoor"
|
||||||
|
|
||||||
|
class SalarySource(Enum):
|
||||||
|
DIRECT_DATA = "direct_data"
|
||||||
|
DESCRIPTION = "description"
|
||||||
|
|
||||||
class ScraperInput(BaseModel):
|
class ScraperInput(BaseModel):
|
||||||
site_type: list[Site]
|
site_type: list[Site]
|
||||||
|
|
|
@ -10,7 +10,7 @@ import numpy as np
|
||||||
from markdownify import markdownify as md
|
from markdownify import markdownify as md
|
||||||
from requests.adapters import HTTPAdapter, Retry
|
from requests.adapters import HTTPAdapter, Retry
|
||||||
|
|
||||||
from ..jobs import JobType
|
from ..jobs import CompensationInterval, JobType
|
||||||
|
|
||||||
logger = logging.getLogger("JobSpy")
|
logger = logging.getLogger("JobSpy")
|
||||||
logger.propagate = False
|
logger.propagate = False
|
||||||
|
@ -193,6 +193,7 @@ def extract_salary(
|
||||||
upper_limit=700000,
|
upper_limit=700000,
|
||||||
hourly_threshold=350,
|
hourly_threshold=350,
|
||||||
monthly_threshold=30000,
|
monthly_threshold=30000,
|
||||||
|
enforce_annual_salary=False,
|
||||||
):
|
):
|
||||||
if not salary_str:
|
if not salary_str:
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
@ -220,20 +221,30 @@ def extract_salary(
|
||||||
|
|
||||||
# Convert to annual if less than the hourly threshold
|
# Convert to annual if less than the hourly threshold
|
||||||
if min_salary < hourly_threshold:
|
if min_salary < hourly_threshold:
|
||||||
min_salary = convert_hourly_to_annual(min_salary)
|
interval = CompensationInterval.HOURLY.value
|
||||||
|
annual_min_salary = convert_hourly_to_annual(min_salary)
|
||||||
if max_salary < hourly_threshold:
|
if max_salary < hourly_threshold:
|
||||||
max_salary = convert_hourly_to_annual(max_salary)
|
annual_max_salary = convert_hourly_to_annual(max_salary)
|
||||||
|
|
||||||
elif min_salary < monthly_threshold:
|
elif min_salary < monthly_threshold:
|
||||||
min_salary = convert_monthly_to_annual(min_salary)
|
interval = CompensationInterval.MONTHLY.value
|
||||||
|
annual_min_salary = convert_monthly_to_annual(min_salary)
|
||||||
if max_salary < monthly_threshold:
|
if max_salary < monthly_threshold:
|
||||||
max_salary = convert_monthly_to_annual(max_salary)
|
annual_max_salary = convert_monthly_to_annual(max_salary)
|
||||||
|
|
||||||
|
else:
|
||||||
|
interval = CompensationInterval.YEARLY.value
|
||||||
|
annual_min_salary = min_salary
|
||||||
|
annual_max_salary = max_salary
|
||||||
|
|
||||||
# Ensure salary range is within specified limits
|
# Ensure salary range is within specified limits
|
||||||
if (
|
if (
|
||||||
lower_limit <= min_salary <= upper_limit
|
lower_limit <= annual_min_salary <= upper_limit
|
||||||
and lower_limit <= max_salary <= upper_limit
|
and lower_limit <= annual_max_salary <= upper_limit
|
||||||
and min_salary < max_salary
|
and annual_min_salary < annual_max_salary
|
||||||
):
|
):
|
||||||
return "yearly", min_salary, max_salary, "USD"
|
if enforce_annual_salary:
|
||||||
|
return interval, annual_min_salary, annual_max_salary, "USD"
|
||||||
|
else:
|
||||||
|
return interval, min_salary, max_salary, "USD"
|
||||||
return None, None, None, None
|
return None, None, None, None
|
||||||
|
|
Loading…
Reference in New Issue