feat(scraper): threading per scraper type

pull/12/head
Cullen Watson 2023-08-26 14:25:52 -05:00
parent 790dc12fdf
commit 7285ca7108
3 changed files with 10 additions and 7 deletions

View File

@ -23,7 +23,7 @@
- **distance**: int
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
- **is_remote**: bool
- **results_wanted**: int
- **results_wanted**: int (per `site_type`)
- **easy_apply**: bool (Only for LinkedIn)
### Example

View File

@ -8,7 +8,8 @@ from api.core.users import UserInDB
from settings import SUPABASE_URL, SUPABASE_KEY
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
if SUPABASE_URL:
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
def create_user(user_create: UserInDB):

View File

@ -1,3 +1,4 @@
from concurrent.futures import ThreadPoolExecutor
from fastapi import APIRouter
from api.core.scrapers.indeed import IndeedScraper
@ -16,12 +17,13 @@ SCRAPER_MAPPING = {
@router.post("/", response_model=List[JobResponse])
async def scrape_jobs(scraper_input: ScraperInput) -> JobResponse:
resp = []
for site in scraper_input.site_type:
async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
def scrape_site(site: str) -> JobResponse:
scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class()
job_response = scraper.scrape(scraper_input)
resp.append(job_response)
return scraper.scrape(scraper_input)
with ThreadPoolExecutor() as executor:
resp = list(executor.map(scrape_site, scraper_input.site_type))
return resp