mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 03:54:31 -08:00
Feat/multiple sites (#12)
* adding multiple search sites * updating docs and postman * threading per scraper type
This commit is contained in:
@@ -8,7 +8,8 @@ from api.core.users import UserInDB
|
||||
from settings import SUPABASE_URL, SUPABASE_KEY
|
||||
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
||||
if SUPABASE_URL:
|
||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
||||
|
||||
|
||||
def create_user(user_create: UserInDB):
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from ..jobs import *
|
||||
from typing import List
|
||||
|
||||
|
||||
class StatusException(Exception):
|
||||
@@ -13,7 +14,7 @@ class Site(Enum):
|
||||
|
||||
|
||||
class ScraperInput(BaseModel):
|
||||
site_type: Site
|
||||
site_type: List[Site]
|
||||
search_term: str
|
||||
|
||||
location: str = None
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from fastapi import APIRouter
|
||||
|
||||
from api.core.scrapers.indeed import IndeedScraper
|
||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||
from api.core.scrapers.linkedin import LinkedInScraper
|
||||
from api.core.scrapers import ScraperInput, Site, JobResponse
|
||||
from typing import List
|
||||
|
||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
||||
|
||||
@@ -14,11 +16,14 @@ SCRAPER_MAPPING = {
|
||||
}
|
||||
|
||||
|
||||
@router.post("/", response_model=JobResponse)
|
||||
async def scrape_jobs(scraper_input: ScraperInput):
|
||||
scraper_class = SCRAPER_MAPPING[scraper_input.site_type]
|
||||
scraper = scraper_class()
|
||||
@router.post("/", response_model=List[JobResponse])
|
||||
async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
|
||||
def scrape_site(site: str) -> JobResponse:
|
||||
scraper_class = SCRAPER_MAPPING[site]
|
||||
scraper = scraper_class()
|
||||
return scraper.scrape(scraper_input)
|
||||
|
||||
job_response = scraper.scrape(scraper_input)
|
||||
with ThreadPoolExecutor() as executor:
|
||||
resp = list(executor.map(scrape_site, scraper_input.site_type))
|
||||
|
||||
return job_response
|
||||
return resp
|
||||
|
||||
Reference in New Issue
Block a user