mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-04 19:44:30 -08:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b303b3f841 | ||
|
|
1a0c75f323 | ||
|
|
e2f6885d61 | ||
|
|
8d65d1b652 | ||
|
|
216d3fd39f | ||
|
|
d3bfdc0a6e |
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.15"
|
version = "1.1.21"
|
||||||
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/JobSpy"
|
homepage = "https://github.com/Bunsly/JobSpy"
|
||||||
|
|||||||
@@ -177,8 +177,8 @@ class CompensationInterval(Enum):
|
|||||||
|
|
||||||
class Compensation(BaseModel):
|
class Compensation(BaseModel):
|
||||||
interval: Optional[CompensationInterval] = None
|
interval: Optional[CompensationInterval] = None
|
||||||
min_amount: int = None
|
min_amount: int | None = None
|
||||||
max_amount: int = None
|
max_amount: int | None = None
|
||||||
currency: Optional[str] = "USD"
|
currency: Optional[str] = "USD"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ jobspy.scrapers.ziprecruiter
|
|||||||
This module contains routines to scrape ZipRecruiter.
|
This module contains routines to scrape ZipRecruiter.
|
||||||
"""
|
"""
|
||||||
import math
|
import math
|
||||||
import json
|
import time
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
from typing import Optional, Tuple, Any
|
from typing import Optional, Tuple, Any
|
||||||
@@ -68,6 +68,7 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
raise ZipRecruiterException("bad proxy")
|
raise ZipRecruiterException("bad proxy")
|
||||||
raise ZipRecruiterException(str(e))
|
raise ZipRecruiterException(str(e))
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
response_data = response.json()
|
response_data = response.json()
|
||||||
jobs_list = response_data.get("jobs", [])
|
jobs_list = response_data.get("jobs", [])
|
||||||
next_continue_token = response_data.get('continue', None)
|
next_continue_token = response_data.get('continue', None)
|
||||||
@@ -113,12 +114,12 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
title = job.get("name")
|
title = job.get("name")
|
||||||
job_url = job.get("job_url")
|
job_url = job.get("job_url")
|
||||||
|
|
||||||
# job_url = updated_job_url if updated_job_url else job_url
|
|
||||||
description = BeautifulSoup(
|
description = BeautifulSoup(
|
||||||
job.get("job_description", "").strip(), "html.parser"
|
job.get("job_description", "").strip(), "html.parser"
|
||||||
).get_text()
|
).get_text()
|
||||||
|
|
||||||
company = job.get("source")
|
company = job['hiring_company'].get("name") if "hiring_company" in job else None
|
||||||
location = Location(
|
location = Location(
|
||||||
city=job.get("job_city"), state=job.get("job_state"), country='usa' if job.get("job_country") == 'US' else 'canada'
|
city=job.get("job_city"), state=job.get("job_state"), country='usa' if job.get("job_country") == 'US' else 'canada'
|
||||||
)
|
)
|
||||||
@@ -142,7 +143,12 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
company_name=company,
|
company_name=company,
|
||||||
location=location,
|
location=location,
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
# compensation=compensation,
|
compensation=Compensation(
|
||||||
|
interval="yearly" if job.get("compensation_interval") == "annual" else job.get("compensation_interval") ,
|
||||||
|
min_amount=int(job["compensation_min"]) if "compensation_min" in job else None,
|
||||||
|
max_amount=int(job["compensation_max"]) if "compensation_max" in job else None,
|
||||||
|
currency=job.get("compensation_currency"),
|
||||||
|
),
|
||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=job_url,
|
job_url=job_url,
|
||||||
description=description,
|
description=description,
|
||||||
|
|||||||
Reference in New Issue
Block a user