2024-03-10 21:36:27 -07:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2024-04-30 10:03:10 -07:00
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
2024-02-14 14:04:23 -08:00
|
|
|
from ..jobs import (
|
|
|
|
Enum,
|
|
|
|
BaseModel,
|
|
|
|
JobType,
|
|
|
|
JobResponse,
|
|
|
|
Country,
|
2024-03-10 21:36:27 -07:00
|
|
|
DescriptionFormat,
|
2024-02-14 14:04:23 -08:00
|
|
|
)
|
2023-07-06 17:12:01 -07:00
|
|
|
|
|
|
|
|
|
|
|
class Site(Enum):
|
|
|
|
LINKEDIN = "linkedin"
|
|
|
|
INDEED = "indeed"
|
|
|
|
ZIP_RECRUITER = "zip_recruiter"
|
2023-10-30 17:57:36 -07:00
|
|
|
GLASSDOOR = "glassdoor"
|
2023-07-06 17:12:01 -07:00
|
|
|
|
2024-07-17 19:05:33 -07:00
|
|
|
class SalarySource(Enum):
|
|
|
|
DIRECT_DATA = "direct_data"
|
|
|
|
DESCRIPTION = "description"
|
2023-07-06 17:12:01 -07:00
|
|
|
|
|
|
|
class ScraperInput(BaseModel):
|
2024-02-04 07:21:45 -08:00
|
|
|
site_type: list[Site]
|
|
|
|
search_term: str | None = None
|
2023-07-11 03:42:20 -07:00
|
|
|
|
2024-02-04 07:21:45 -08:00
|
|
|
location: str | None = None
|
|
|
|
country: Country | None = Country.USA
|
|
|
|
distance: int | None = None
|
2023-07-11 03:42:20 -07:00
|
|
|
is_remote: bool = False
|
2024-02-04 07:21:45 -08:00
|
|
|
job_type: JobType | None = None
|
|
|
|
easy_apply: bool | None = None
|
2023-09-28 16:11:28 -07:00
|
|
|
offset: int = 0
|
2024-02-14 14:04:23 -08:00
|
|
|
linkedin_fetch_description: bool = False
|
2024-02-04 07:21:45 -08:00
|
|
|
linkedin_company_ids: list[int] | None = None
|
2024-02-14 14:04:23 -08:00
|
|
|
description_format: DescriptionFormat | None = DescriptionFormat.MARKDOWN
|
2023-07-10 16:04:44 -07:00
|
|
|
|
2023-07-10 20:07:19 -07:00
|
|
|
results_wanted: int = 15
|
2024-02-09 12:02:03 -08:00
|
|
|
hours_old: int | None = None
|
2023-07-06 17:12:01 -07:00
|
|
|
|
|
|
|
|
2024-04-30 10:03:10 -07:00
|
|
|
class Scraper(ABC):
|
2024-10-08 15:46:46 -07:00
|
|
|
def __init__(self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None):
|
2023-07-06 17:12:01 -07:00
|
|
|
self.site = site
|
2024-10-08 15:46:46 -07:00
|
|
|
self.proxies = proxies
|
|
|
|
self.ca_cert = ca_cert
|
2023-07-06 17:12:01 -07:00
|
|
|
|
2024-04-30 10:03:10 -07:00
|
|
|
@abstractmethod
|
2024-02-09 12:02:03 -08:00
|
|
|
def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...
|