feat(scraper): threading per scraper type

2023-08-26 14:25:52 -05:00 · 2023-08-26 14:25:52 -05:00 · 7285ca7108
parent 790dc12fdf
commit 7285ca7108
3 changed files with 10 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -23,7 +23,7 @@
 - **distance**: int
 - **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
 - **is_remote**: bool
- **results_wanted**: int
+  - **results_wanted**: int (per `site_type`)
 - **easy_apply**: bool (Only for LinkedIn)

 ### Example
--- a/api/auth/db_utils.py
+++ b/api/auth/db_utils.py
@ -8,7 +8,8 @@ from api.core.users import UserInDB
 from settings import SUPABASE_URL, SUPABASE_KEY

 pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
-supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+if SUPABASE_URL:
+    supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)


 def create_user(user_create: UserInDB):
--- a/api/v1/jobs/init.py
+++ b/api/v1/jobs/init.py
@ -1,3 +1,4 @@
+from concurrent.futures import ThreadPoolExecutor
 from fastapi import APIRouter

 from api.core.scrapers.indeed import IndeedScraper
@ -16,12 +17,13 @@ SCRAPER_MAPPING = {


@router.post("/", response_model=List[JobResponse])
-async def scrape_jobs(scraper_input: ScraperInput) -> JobResponse:
-    resp = []
-    for site in scraper_input.site_type:
+async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
+    def scrape_site(site: str) -> JobResponse:
        scraper_class = SCRAPER_MAPPING[site]
        scraper = scraper_class()
-        job_response = scraper.scrape(scraper_input)
-        resp.append(job_response)
+        return scraper.scrape(scraper_input)
+
+    with ThreadPoolExecutor() as executor:
+        resp = list(executor.map(scrape_site, scraper_input.site_type))

    return resp