Feat/multiple sites (#12)

* adding multiple search sites

* updating docs and postman

* threading per scraper type
pull/15/head
mlnl 2023-08-26 12:28:02 -07:00 committed by GitHub
parent d67383f053
commit 4d04bb63e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 34 additions and 11 deletions

16
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Module",
"type": "python",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload"]
}
]
}

View File

@ -17,13 +17,13 @@
**Endpoint**: `/api/v1/jobs/` **Endpoint**: `/api/v1/jobs/`
#### Parameters: #### Parameters:
- **site_type**: str (Required) - Options: `linkedin`, `zip_recruiter`, `indeed` - **site_type**: List[str] (Required) - Options: `linkedin`, `zip_recruiter`, `indeed`
- **search_term**: str (Required) - **search_term**: str (Required)
- **location**: int - **location**: int
- **distance**: int - **distance**: int
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract` - **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
- **is_remote**: bool - **is_remote**: bool
- **results_wanted**: int - **results_wanted**: int (per `site_type`)
- **easy_apply**: bool (Only for LinkedIn) - **easy_apply**: bool (Only for LinkedIn)
### Example ### Example

View File

@ -8,6 +8,7 @@ from api.core.users import UserInDB
from settings import SUPABASE_URL, SUPABASE_KEY from settings import SUPABASE_URL, SUPABASE_KEY
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
if SUPABASE_URL:
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

View File

@ -1,4 +1,5 @@
from ..jobs import * from ..jobs import *
from typing import List
class StatusException(Exception): class StatusException(Exception):
@ -13,7 +14,7 @@ class Site(Enum):
class ScraperInput(BaseModel): class ScraperInput(BaseModel):
site_type: Site site_type: List[Site]
search_term: str search_term: str
location: str = None location: str = None

View File

@ -1,9 +1,11 @@
from concurrent.futures import ThreadPoolExecutor
from fastapi import APIRouter from fastapi import APIRouter
from api.core.scrapers.indeed import IndeedScraper from api.core.scrapers.indeed import IndeedScraper
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
from api.core.scrapers.linkedin import LinkedInScraper from api.core.scrapers.linkedin import LinkedInScraper
from api.core.scrapers import ScraperInput, Site, JobResponse from api.core.scrapers import ScraperInput, Site, JobResponse
from typing import List
router = APIRouter(prefix="/jobs", tags=["jobs"]) router = APIRouter(prefix="/jobs", tags=["jobs"])
@ -14,11 +16,14 @@ SCRAPER_MAPPING = {
} }
@router.post("/", response_model=JobResponse) @router.post("/", response_model=List[JobResponse])
async def scrape_jobs(scraper_input: ScraperInput): async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
scraper_class = SCRAPER_MAPPING[scraper_input.site_type] def scrape_site(site: str) -> JobResponse:
scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class() scraper = scraper_class()
return scraper.scrape(scraper_input)
job_response = scraper.scrape(scraper_input) with ThreadPoolExecutor() as executor:
resp = list(executor.map(scrape_site, scraper_input.site_type))
return job_response return resp

View File

@ -23,7 +23,7 @@
"header": [], "header": [],
"body": { "body": {
"mode": "raw", "mode": "raw",
"raw": "{\r\n \"site_type\": \"linkedin\", // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}", "raw": "{\r\n \"site_type\": [\"linkedin\"], // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}",
"options": { "options": {
"raw": { "raw": {
"language": "json" "language": "json"