mirror of https://github.com/Bunsly/JobSpy
feat(jobs): add site_type param
parent
d69c41392d
commit
a5920ddc52
|
@ -1,15 +1,18 @@
|
||||||
from pydantic import BaseModel
|
from typing import Union
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class JobType(Enum):
|
class JobType(Enum):
|
||||||
FULL_TIME = "full_time"
|
FULL_TIME = "fulltime"
|
||||||
PART_TIME = "part_time"
|
PART_TIME = "parttime"
|
||||||
CONTRACT = "contractor"
|
CONTRACT = "contract"
|
||||||
INTERNSHIP = "internship"
|
|
||||||
TEMPORARY = "temporary"
|
TEMPORARY = "temporary"
|
||||||
PER_DIEM = "per_diem"
|
INTERNSHIP = "internship"
|
||||||
|
|
||||||
|
PER_DIEM = "perdiem"
|
||||||
NIGHTS = "nights"
|
NIGHTS = "nights"
|
||||||
OTHER = "other"
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
from pydantic import BaseModel
|
from ..jobs import *
|
||||||
from enum import Enum
|
|
||||||
from ..jobs import JobResponse
|
|
||||||
|
|
||||||
|
|
||||||
class Site(Enum):
|
class Site(Enum):
|
||||||
|
@ -16,6 +14,7 @@ class ScraperInput(BaseModel):
|
||||||
location: str = None
|
location: str = None
|
||||||
distance: int = None
|
distance: int = None
|
||||||
is_remote: bool = False
|
is_remote: bool = False
|
||||||
|
job_type: JobType = None
|
||||||
|
|
||||||
results_wanted: int = 15
|
results_wanted: int = 15
|
||||||
|
|
||||||
|
|
|
@ -44,17 +44,23 @@ class IndeedScraper(Scraper):
|
||||||
"q": scraper_input.search_term,
|
"q": scraper_input.search_term,
|
||||||
"location": scraper_input.location,
|
"location": scraper_input.location,
|
||||||
"radius": scraper_input.distance,
|
"radius": scraper_input.distance,
|
||||||
"sc": "0kf:attr(DSQF7);" if scraper_input.is_remote else None,
|
|
||||||
"filter": 0,
|
"filter": 0,
|
||||||
"start": 0 + page * 10,
|
"start": 0 + page * 10,
|
||||||
}
|
}
|
||||||
|
sc_values = []
|
||||||
|
if scraper_input.is_remote:
|
||||||
|
sc_values.append("attr(DSQF7)")
|
||||||
|
if scraper_input.job_type:
|
||||||
|
sc_values.append("jt({})".format(scraper_input.job_type.value))
|
||||||
|
|
||||||
|
if sc_values:
|
||||||
|
params["sc"] = "0kf:" + "".join(sc_values) + ";"
|
||||||
response = session.get(self.url, params=params)
|
response = session.get(self.url, params=params)
|
||||||
|
|
||||||
if response.status_code == 307:
|
if (
|
||||||
new_url = response.headers["Location"]
|
response.status_code != status.HTTP_200_OK
|
||||||
response = session.get(new_url)
|
and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT
|
||||||
if response.status_code != status.HTTP_200_OK:
|
):
|
||||||
return JobResponse(
|
return JobResponse(
|
||||||
success=False,
|
success=False,
|
||||||
error=f"Response returned {response.status_code}",
|
error=f"Response returned {response.status_code}",
|
||||||
|
@ -150,7 +156,7 @@ class IndeedScraper(Scraper):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_job_type(job: dict) -> Optional[JobType]:
|
def get_job_type(job: dict) -> Optional[JobType]:
|
||||||
"""
|
"""
|
||||||
Parses the job to get JobType
|
Parses the job to get JobTypeIndeed
|
||||||
:param job:
|
:param job:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -30,6 +30,17 @@ class LinkedInScraper(Scraper):
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
page, processed_jobs, job_count = 0, 0, 0
|
page, processed_jobs, job_count = 0, 0, 0
|
||||||
|
|
||||||
|
def job_type_code(job_type):
|
||||||
|
mapping = {
|
||||||
|
JobType.FULL_TIME: "F",
|
||||||
|
JobType.PART_TIME: "P",
|
||||||
|
JobType.INTERNSHIP: "I",
|
||||||
|
JobType.CONTRACT: "C",
|
||||||
|
JobType.TEMPORARY: "T",
|
||||||
|
}
|
||||||
|
|
||||||
|
return mapping.get(job_type, "")
|
||||||
|
|
||||||
with requests.Session() as session:
|
with requests.Session() as session:
|
||||||
while len(job_list) < scraper_input.results_wanted:
|
while len(job_list) < scraper_input.results_wanted:
|
||||||
params = {
|
params = {
|
||||||
|
@ -37,6 +48,9 @@ class LinkedInScraper(Scraper):
|
||||||
"location": scraper_input.location,
|
"location": scraper_input.location,
|
||||||
"distance": scraper_input.distance,
|
"distance": scraper_input.distance,
|
||||||
"f_WT": 2 if scraper_input.is_remote else None,
|
"f_WT": 2 if scraper_input.is_remote else None,
|
||||||
|
"f_JT": job_type_code(scraper_input.job_type)
|
||||||
|
if scraper_input.job_type
|
||||||
|
else None,
|
||||||
"pageNum": page,
|
"pageNum": page,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,15 @@ class ZipRecruiterScraper(Scraper):
|
||||||
processed_jobs, job_count = 0, 0
|
processed_jobs, job_count = 0, 0
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
while len(job_list) < scraper_input.results_wanted:
|
while len(job_list) < scraper_input.results_wanted:
|
||||||
|
job_type_value = None
|
||||||
|
if scraper_input.job_type:
|
||||||
|
if scraper_input.job_type.value == "fulltime":
|
||||||
|
job_type_value = "full_time"
|
||||||
|
elif scraper_input.job_type.value == "parttime":
|
||||||
|
job_type_value = "part_time"
|
||||||
|
else:
|
||||||
|
job_type_value = scraper_input.job_type.value
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"search": scraper_input.search_term,
|
"search": scraper_input.search_term,
|
||||||
"location": scraper_input.location,
|
"location": scraper_input.location,
|
||||||
|
@ -42,12 +51,16 @@ class ZipRecruiterScraper(Scraper):
|
||||||
"refine_by_location_type": "only_remote"
|
"refine_by_location_type": "only_remote"
|
||||||
if scraper_input.is_remote
|
if scraper_input.is_remote
|
||||||
else None,
|
else None,
|
||||||
|
"refine_by_employment": f"employment_type:employment_type:{job_type_value}"
|
||||||
|
if job_type_value
|
||||||
|
else None,
|
||||||
"page": page,
|
"page": page,
|
||||||
}
|
}
|
||||||
|
|
||||||
response = session.get(
|
response = session.get(
|
||||||
self.url, headers=ZipRecruiterScraper.headers(), params=params
|
self.url, headers=ZipRecruiterScraper.headers(), params=params
|
||||||
)
|
)
|
||||||
|
print(response.url)
|
||||||
if response.status_code != status.HTTP_200_OK:
|
if response.status_code != status.HTTP_200_OK:
|
||||||
return JobResponse(
|
return JobResponse(
|
||||||
success=False,
|
success=False,
|
||||||
|
@ -74,15 +87,22 @@ class ZipRecruiterScraper(Scraper):
|
||||||
company = job.find("a", {"class": "company_name"}).text.strip()
|
company = job.find("a", {"class": "company_name"}).text.strip()
|
||||||
description = job.find("p", {"class": "job_snippet"}).text.strip()
|
description = job.find("p", {"class": "job_snippet"}).text.strip()
|
||||||
job_type_element = job.find("li", {"class": "perk_item perk_type"})
|
job_type_element = job.find("li", {"class": "perk_item perk_type"})
|
||||||
job_type = (
|
|
||||||
job_type_element.text.strip().lower().replace("-", "_")
|
if job_type_element:
|
||||||
if job_type_element
|
job_type_text = (
|
||||||
else None
|
job_type_element.text.strip()
|
||||||
)
|
.lower()
|
||||||
|
.replace("-", "")
|
||||||
|
.replace(" ", "")
|
||||||
|
)
|
||||||
|
if job_type_text == "contractor":
|
||||||
|
job_type_text = "contract"
|
||||||
|
job_type = JobType(job_type_text)
|
||||||
|
else:
|
||||||
|
job_type = None
|
||||||
|
|
||||||
date_posted = ZipRecruiterScraper.get_date_posted(job)
|
date_posted = ZipRecruiterScraper.get_date_posted(job)
|
||||||
|
|
||||||
job_type = job_type.replace(" ", "_") if job_type else job_type
|
|
||||||
job_post = JobPost(
|
job_post = JobPost(
|
||||||
title=title,
|
title=title,
|
||||||
description=description,
|
description=description,
|
||||||
|
|
|
@ -6,4 +6,4 @@ SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
||||||
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
||||||
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
||||||
ALGORITHM = "HS256"
|
ALGORITHM = "HS256"
|
||||||
ACCESS_TOKEN_EXPIRE_MINUTES = 120
|
ACCESS_TOKEN_EXPIRE_MINUTES = 3600
|
||||||
|
|
Loading…
Reference in New Issue