feat(scraper): threading per scraper type

pull/12/head
Cullen Watson 2023-08-26 14:25:52 -05:00
parent 790dc12fdf
commit 7285ca7108
3 changed files with 10 additions and 7 deletions

View File

@ -23,7 +23,7 @@
- **distance**: int - **distance**: int
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract` - **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
- **is_remote**: bool - **is_remote**: bool
- **results_wanted**: int - **results_wanted**: int (per `site_type`)
- **easy_apply**: bool (Only for LinkedIn) - **easy_apply**: bool (Only for LinkedIn)
### Example ### Example

View File

@ -8,6 +8,7 @@ from api.core.users import UserInDB
from settings import SUPABASE_URL, SUPABASE_KEY from settings import SUPABASE_URL, SUPABASE_KEY
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
if SUPABASE_URL:
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

View File

@ -1,3 +1,4 @@
from concurrent.futures import ThreadPoolExecutor
from fastapi import APIRouter from fastapi import APIRouter
from api.core.scrapers.indeed import IndeedScraper from api.core.scrapers.indeed import IndeedScraper
@ -16,12 +17,13 @@ SCRAPER_MAPPING = {
@router.post("/", response_model=List[JobResponse]) @router.post("/", response_model=List[JobResponse])
async def scrape_jobs(scraper_input: ScraperInput) -> JobResponse: async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
resp = [] def scrape_site(site: str) -> JobResponse:
for site in scraper_input.site_type:
scraper_class = SCRAPER_MAPPING[site] scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class() scraper = scraper_class()
job_response = scraper.scrape(scraper_input) return scraper.scrape(scraper_input)
resp.append(job_response)
with ThreadPoolExecutor() as executor:
resp = list(executor.map(scrape_site, scraper_input.site_type))
return resp return resp