From 4d04bb63e26fab9dd11120f134b4edd536db9951 Mon Sep 17 00:00:00 2001 From: mlnl Date: Sat, 26 Aug 2023 12:28:02 -0700 Subject: [PATCH] Feat/multiple sites (#12) * adding multiple search sites * updating docs and postman * threading per scraper type --- .vscode/launch.json | 16 ++++++++++++++++ README.md | 4 ++-- api/auth/db_utils.py | 3 ++- api/core/scrapers/__init__.py | 3 ++- api/v1/jobs/__init__.py | 17 +++++++++++------ postman/JobSpy.postman_collection.json | 2 +- 6 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..e5f6771 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Module", + "type": "python", + "request": "launch", + "module": "uvicorn", + "args": ["main:app","--reload"] + } + + ] +} \ No newline at end of file diff --git a/README.md b/README.md index c333f07..ad62882 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,13 @@ **Endpoint**: `/api/v1/jobs/` #### Parameters: -- **site_type**: str (Required) - Options: `linkedin`, `zip_recruiter`, `indeed` +- **site_type**: List[str] (Required) - Options: `linkedin`, `zip_recruiter`, `indeed` - **search_term**: str (Required) - **location**: int - **distance**: int - **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract` - **is_remote**: bool -- **results_wanted**: int + - **results_wanted**: int (per `site_type`) - **easy_apply**: bool (Only for LinkedIn) ### Example diff --git a/api/auth/db_utils.py b/api/auth/db_utils.py index bca7f4c..696513a 100644 --- a/api/auth/db_utils.py +++ b/api/auth/db_utils.py @@ -8,7 +8,8 @@ from api.core.users import UserInDB from settings import SUPABASE_URL, SUPABASE_KEY pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") -supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) +if SUPABASE_URL: + supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) def create_user(user_create: UserInDB): diff --git a/api/core/scrapers/__init__.py b/api/core/scrapers/__init__.py index 401f9bc..057036b 100644 --- a/api/core/scrapers/__init__.py +++ b/api/core/scrapers/__init__.py @@ -1,4 +1,5 @@ from ..jobs import * +from typing import List class StatusException(Exception): @@ -13,7 +14,7 @@ class Site(Enum): class ScraperInput(BaseModel): - site_type: Site + site_type: List[Site] search_term: str location: str = None diff --git a/api/v1/jobs/__init__.py b/api/v1/jobs/__init__.py index 66fed9e..f45cfe8 100644 --- a/api/v1/jobs/__init__.py +++ b/api/v1/jobs/__init__.py @@ -1,9 +1,11 @@ +from concurrent.futures import ThreadPoolExecutor from fastapi import APIRouter from api.core.scrapers.indeed import IndeedScraper from api.core.scrapers.ziprecruiter import ZipRecruiterScraper from api.core.scrapers.linkedin import LinkedInScraper from api.core.scrapers import ScraperInput, Site, JobResponse +from typing import List router = APIRouter(prefix="/jobs", tags=["jobs"]) @@ -14,11 +16,14 @@ SCRAPER_MAPPING = { } -@router.post("/", response_model=JobResponse) -async def scrape_jobs(scraper_input: ScraperInput): - scraper_class = SCRAPER_MAPPING[scraper_input.site_type] - scraper = scraper_class() +@router.post("/", response_model=List[JobResponse]) +async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]: + def scrape_site(site: str) -> JobResponse: + scraper_class = SCRAPER_MAPPING[site] + scraper = scraper_class() + return scraper.scrape(scraper_input) - job_response = scraper.scrape(scraper_input) + with ThreadPoolExecutor() as executor: + resp = list(executor.map(scrape_site, scraper_input.site_type)) - return job_response + return resp diff --git a/postman/JobSpy.postman_collection.json b/postman/JobSpy.postman_collection.json index 639eaf8..bbf78ff 100644 --- a/postman/JobSpy.postman_collection.json +++ b/postman/JobSpy.postman_collection.json @@ -23,7 +23,7 @@ "header": [], "body": { "mode": "raw", - "raw": "{\r\n \"site_type\": \"linkedin\", // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}", + "raw": "{\r\n \"site_type\": [\"linkedin\"], // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}", "options": { "raw": { "language": "json"