update resp schema (#15)

pull/16/head
Cullen Watson 2023-08-26 20:30:00 -05:00 committed by GitHub
parent 70a7e7fe8d
commit 3210052177
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 260 additions and 41 deletions

280
README.md

File diff suppressed because one or more lines are too long

View File

@ -5,7 +5,7 @@ from api.core.scrapers.indeed import IndeedScraper
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
from api.core.scrapers.linkedin import LinkedInScraper from api.core.scrapers.linkedin import LinkedInScraper
from api.core.scrapers import ScraperInput, Site, JobResponse from api.core.scrapers import ScraperInput, Site, JobResponse
from typing import List from typing import List, Dict, Tuple
router = APIRouter(prefix="/jobs", tags=["jobs"]) router = APIRouter(prefix="/jobs", tags=["jobs"])
@ -16,14 +16,21 @@ SCRAPER_MAPPING = {
} }
@router.post("/", response_model=List[JobResponse]) @router.post("/")
async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]: async def scrape_jobs(scraper_input: ScraperInput) -> Dict[str, JobResponse]:
def scrape_site(site: str) -> JobResponse: """
Asynchronously scrapes job data from multiple job sites.
:param scraper_input:
:return: Dict[str, JobResponse]: where each key is a site
"""
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
scraper_class = SCRAPER_MAPPING[site] scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class() scraper = scraper_class()
return scraper.scrape(scraper_input) scraped_data = scraper.scrape(scraper_input)
return (site.value, scraped_data)
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
resp = list(executor.map(scrape_site, scraper_input.site_type)) resp_dict = {site: resp for site, resp in executor.map(scrape_site, scraper_input.site_type)}
return resp_dict
return resp