Compare commits

...

2 Commits

Author SHA1 Message Date
Cullen
216d3fd39f ziprecruiter: 5s delay 2023-10-28 16:41:32 -05:00
Cullen Watson
d3bfdc0a6e ziprecruiter api (#63) 2023-10-28 16:17:28 -05:00
3 changed files with 14 additions and 7 deletions

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.15" version = "1.1.16"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy" homepage = "https://github.com/Bunsly/JobSpy"

View File

@@ -177,8 +177,8 @@ class CompensationInterval(Enum):
class Compensation(BaseModel): class Compensation(BaseModel):
interval: Optional[CompensationInterval] = None interval: Optional[CompensationInterval] = None
min_amount: int = None min_amount: int | None = None
max_amount: int = None max_amount: int | None = None
currency: Optional[str] = "USD" currency: Optional[str] = "USD"

View File

@@ -5,7 +5,7 @@ jobspy.scrapers.ziprecruiter
This module contains routines to scrape ZipRecruiter. This module contains routines to scrape ZipRecruiter.
""" """
import math import math
import json import time
import re import re
from datetime import datetime, date from datetime import datetime, date
from typing import Optional, Tuple, Any from typing import Optional, Tuple, Any
@@ -68,9 +68,11 @@ class ZipRecruiterScraper(Scraper):
raise ZipRecruiterException("bad proxy") raise ZipRecruiterException("bad proxy")
raise ZipRecruiterException(str(e)) raise ZipRecruiterException(str(e))
time.sleep(5)
response_data = response.json() response_data = response.json()
jobs_list = response_data.get("jobs", []) jobs_list = response_data.get("jobs", [])
next_continue_token = response_data.get('continue', None) next_continue_token = response_data.get('continue', None)
print(len(jobs_list))
with ThreadPoolExecutor(max_workers=10) as executor: with ThreadPoolExecutor(max_workers=10) as executor:
job_results = [ job_results = [
@@ -113,12 +115,12 @@ class ZipRecruiterScraper(Scraper):
title = job.get("name") title = job.get("name")
job_url = job.get("job_url") job_url = job.get("job_url")
# job_url = updated_job_url if updated_job_url else job_url
description = BeautifulSoup( description = BeautifulSoup(
job.get("job_description", "").strip(), "html.parser" job.get("job_description", "").strip(), "html.parser"
).get_text() ).get_text()
company = job.get("source") company = job['hiring_company'].get("name") if "hiring_company" in job else None
location = Location( location = Location(
city=job.get("job_city"), state=job.get("job_state"), country='usa' if job.get("job_country") == 'US' else 'canada' city=job.get("job_city"), state=job.get("job_state"), country='usa' if job.get("job_country") == 'US' else 'canada'
) )
@@ -142,7 +144,12 @@ class ZipRecruiterScraper(Scraper):
company_name=company, company_name=company,
location=location, location=location,
job_type=job_type, job_type=job_type,
# compensation=compensation, compensation=Compensation(
interval="yearly" if job.get("compensation_interval") == "annual" else job.get("compensation_interval") ,
min_amount=int(job["compensation_min"]) if "compensation_min" in job else None,
max_amount=int(job["compensation_max"]) if "compensation_max" in job else None,
currency=job.get("compensation_currency"),
),
date_posted=date_posted, date_posted=date_posted,
job_url=job_url, job_url=job_url,
description=description, description=description,