Feat/multiple sites (#12)

* adding multiple search sites * updating docs and postman * threading per scraper type
2023-08-26 12:28:02 -07:00 · 2023-08-26 12:28:02 -07:00 · 4d04bb63e2
parent d67383f053
commit 4d04bb63e2
6 changed files with 34 additions and 11 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,16 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Module",
            "type": "python",
            "request": "launch",
            "module": "uvicorn",
            "args": ["main:app","--reload"]
        }
    ]
 }
--- a/README.md
+++ b/README.md
@ -17,13 +17,13 @@
 **Endpoint**: `/api/v1/jobs/`
 #### Parameters:
- **site_type**: str (Required) - Options: `linkedin`, `zip_recruiter`, `indeed`
+- **site_type**: List[str] (Required) - Options: `linkedin`, `zip_recruiter`, `indeed`
 - **search_term**: str (Required)
 - **location**: int
 - **distance**: int
 - **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
 - **is_remote**: bool
- **results_wanted**: int
+  - **results_wanted**: int (per `site_type`)
 - **easy_apply**: bool (Only for LinkedIn)
 ### Example
--- a/api/auth/db_utils.py
+++ b/api/auth/db_utils.py
@ -8,6 +8,7 @@ from api.core.users import UserInDB
 from settings import SUPABASE_URL, SUPABASE_KEY
 pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
 if SUPABASE_URL:
    supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
--- a/api/core/scrapers/init.py
+++ b/api/core/scrapers/init.py
@ -1,4 +1,5 @@
 from ..jobs import *
 from typing import List
 class StatusException(Exception):
@ -13,7 +14,7 @@ class Site(Enum):
 class ScraperInput(BaseModel):
-    site_type: Site
+    site_type: List[Site]
    search_term: str
    location: str = None
--- a/api/v1/jobs/init.py
+++ b/api/v1/jobs/init.py
@ -1,9 +1,11 @@
 from concurrent.futures import ThreadPoolExecutor
 from fastapi import APIRouter
 from api.core.scrapers.indeed import IndeedScraper
 from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
 from api.core.scrapers.linkedin import LinkedInScraper
 from api.core.scrapers import ScraperInput, Site, JobResponse
 from typing import List
 router = APIRouter(prefix="/jobs", tags=["jobs"])
@ -14,11 +16,14 @@ SCRAPER_MAPPING = {
 }
-@router.post("/", response_model=JobResponse)
+@router.post("/", response_model=List[JobResponse])
-async def scrape_jobs(scraper_input: ScraperInput):
+async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
-    scraper_class = SCRAPER_MAPPING[scraper_input.site_type]
+    def scrape_site(site: str) -> JobResponse:
        scraper_class = SCRAPER_MAPPING[site]
        scraper = scraper_class()
        return scraper.scrape(scraper_input)
-    job_response = scraper.scrape(scraper_input)
+    with ThreadPoolExecutor() as executor:
        resp = list(executor.map(scrape_site, scraper_input.site_type))
-    return job_response
+    return resp
--- a/postman/JobSpy.postman_collection.json
+++ b/postman/JobSpy.postman_collection.json
@ -23,7 +23,7 @@
 				"header": [],
 				"body": {
 					"mode": "raw",
-					"raw": "{\r\n    \"site_type\": \"linkedin\", // linkedin / indeed / zip_recruiter\r\n    \"search_term\": \"engineer\",\r\n\r\n    // optional\r\n    \"location\": \"tx\",\r\n    \"distance\": 10,\r\n    \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n    // \"is_remote\": true,\r\n    \"easy_apply\": true, // linkedin only\r\n    \"results_wanted\": 10   \r\n}",
+					"raw": "{\r\n    \"site_type\": [\"linkedin\"], // linkedin / indeed / zip_recruiter\r\n    \"search_term\": \"engineer\",\r\n\r\n    // optional\r\n    \"location\": \"tx\",\r\n    \"distance\": 10,\r\n    \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n    // \"is_remote\": true,\r\n    \"easy_apply\": true, // linkedin only\r\n    \"results_wanted\": 10   \r\n}",
 					"options": {
 						"raw": {
 							"language": "json"