From 7285ca7108211b3368185f642d977f59a8cc5567 Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Sat, 26 Aug 2023 14:25:52 -0500
Subject: [PATCH] feat(scraper): threading per scraper type

---
 README.md               |  2 +-
 api/auth/db_utils.py    |  3 ++-
 api/v1/jobs/__init__.py | 12 +++++++-----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 14dc31b..ad62882 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@
 - **distance**: int
 - **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
 - **is_remote**: bool
-- **results_wanted**: int
+  - **results_wanted**: int (per `site_type`)
 - **easy_apply**: bool (Only for LinkedIn)
 
 ### Example
diff --git a/api/auth/db_utils.py b/api/auth/db_utils.py
index bca7f4c..696513a 100644
--- a/api/auth/db_utils.py
+++ b/api/auth/db_utils.py
@@ -8,7 +8,8 @@ from api.core.users import UserInDB
 from settings import SUPABASE_URL, SUPABASE_KEY
 
 pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
-supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+if SUPABASE_URL:
+    supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
 
 
 def create_user(user_create: UserInDB):
diff --git a/api/v1/jobs/__init__.py b/api/v1/jobs/__init__.py
index a78862d..f45cfe8 100644
--- a/api/v1/jobs/__init__.py
+++ b/api/v1/jobs/__init__.py
@@ -1,3 +1,4 @@
+from concurrent.futures import ThreadPoolExecutor
 from fastapi import APIRouter
 
 from api.core.scrapers.indeed import IndeedScraper
@@ -16,12 +17,13 @@ SCRAPER_MAPPING = {
 
 
 @router.post("/", response_model=List[JobResponse])
-async def scrape_jobs(scraper_input: ScraperInput) -> JobResponse:
-    resp = []
-    for site in scraper_input.site_type:
+async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
+    def scrape_site(site: str) -> JobResponse:
         scraper_class = SCRAPER_MAPPING[site]
         scraper = scraper_class()
-        job_response = scraper.scrape(scraper_input)
-        resp.append(job_response)
+        return scraper.scrape(scraper_input)
+
+    with ThreadPoolExecutor() as executor:
+        resp = list(executor.map(scrape_site, scraper_input.site_type))
 
     return resp