JobSpy/api/v1/jobs/__init__.py

40 lines
1.2 KiB
Python
Raw Normal View History

from concurrent.futures import ThreadPoolExecutor
from fastapi import APIRouter
2023-07-06 16:44:38 -07:00
2023-07-07 19:00:59 -07:00
from api.core.scrapers.indeed import IndeedScraper
2023-07-08 04:57:36 -07:00
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
2023-07-08 07:34:55 -07:00
from api.core.scrapers.linkedin import LinkedInScraper
2023-07-10 16:04:44 -07:00
from api.core.scrapers import ScraperInput, Site, JobResponse
2023-08-26 18:30:00 -07:00
from typing import List, Dict, Tuple
2023-07-07 19:00:59 -07:00
2023-08-19 16:46:03 -07:00
router = APIRouter(prefix="/jobs", tags=["jobs"])
2023-07-07 19:00:59 -07:00
SCRAPER_MAPPING = {
Site.LINKEDIN: LinkedInScraper,
Site.INDEED: IndeedScraper,
Site.ZIP_RECRUITER: ZipRecruiterScraper,
}
2023-07-07 19:00:59 -07:00
2023-07-08 07:34:55 -07:00
2023-08-26 18:30:00 -07:00
@router.post("/")
async def scrape_jobs(scraper_input: ScraperInput) -> Dict[str, JobResponse]:
"""
Asynchronously scrapes job data from multiple job sites.
:param scraper_input:
:return: Dict[str, JobResponse]: where each key is a site
"""
2023-08-26 18:30:00 -07:00
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class()
2023-08-26 18:30:00 -07:00
scraped_data = scraper.scrape(scraper_input)
return (site.value, scraped_data)
2023-07-08 07:34:55 -07:00
with ThreadPoolExecutor() as executor:
resp_dict = {
site: resp
for site, resp in executor.map(scrape_site, scraper_input.site_type)
}
2023-08-26 18:30:00 -07:00
return resp_dict