mirror of https://github.com/Bunsly/JobSpy
Feat/multiple sites (#12)
* adding multiple search sites * updating docs and postman * threading per scraper typepull/15/head
parent
d67383f053
commit
4d04bb63e2
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Module",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"args": ["main:app","--reload"]
|
||||
}
|
||||
|
||||
]
|
||||
}
|
|
@ -17,13 +17,13 @@
|
|||
**Endpoint**: `/api/v1/jobs/`
|
||||
|
||||
#### Parameters:
|
||||
- **site_type**: str (Required) - Options: `linkedin`, `zip_recruiter`, `indeed`
|
||||
- **site_type**: List[str] (Required) - Options: `linkedin`, `zip_recruiter`, `indeed`
|
||||
- **search_term**: str (Required)
|
||||
- **location**: int
|
||||
- **distance**: int
|
||||
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
|
||||
- **is_remote**: bool
|
||||
- **results_wanted**: int
|
||||
- **results_wanted**: int (per `site_type`)
|
||||
- **easy_apply**: bool (Only for LinkedIn)
|
||||
|
||||
### Example
|
||||
|
|
|
@ -8,6 +8,7 @@ from api.core.users import UserInDB
|
|||
from settings import SUPABASE_URL, SUPABASE_KEY
|
||||
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
if SUPABASE_URL:
|
||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from ..jobs import *
|
||||
from typing import List
|
||||
|
||||
|
||||
class StatusException(Exception):
|
||||
|
@ -13,7 +14,7 @@ class Site(Enum):
|
|||
|
||||
|
||||
class ScraperInput(BaseModel):
|
||||
site_type: Site
|
||||
site_type: List[Site]
|
||||
search_term: str
|
||||
|
||||
location: str = None
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
from concurrent.futures import ThreadPoolExecutor
|
||||
from fastapi import APIRouter
|
||||
|
||||
from api.core.scrapers.indeed import IndeedScraper
|
||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||
from api.core.scrapers.linkedin import LinkedInScraper
|
||||
from api.core.scrapers import ScraperInput, Site, JobResponse
|
||||
from typing import List
|
||||
|
||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
||||
|
||||
|
@ -14,11 +16,14 @@ SCRAPER_MAPPING = {
|
|||
}
|
||||
|
||||
|
||||
@router.post("/", response_model=JobResponse)
|
||||
async def scrape_jobs(scraper_input: ScraperInput):
|
||||
scraper_class = SCRAPER_MAPPING[scraper_input.site_type]
|
||||
@router.post("/", response_model=List[JobResponse])
|
||||
async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
|
||||
def scrape_site(site: str) -> JobResponse:
|
||||
scraper_class = SCRAPER_MAPPING[site]
|
||||
scraper = scraper_class()
|
||||
return scraper.scrape(scraper_input)
|
||||
|
||||
job_response = scraper.scrape(scraper_input)
|
||||
with ThreadPoolExecutor() as executor:
|
||||
resp = list(executor.map(scrape_site, scraper_input.site_type))
|
||||
|
||||
return job_response
|
||||
return resp
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
"header": [],
|
||||
"body": {
|
||||
"mode": "raw",
|
||||
"raw": "{\r\n \"site_type\": \"linkedin\", // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}",
|
||||
"raw": "{\r\n \"site_type\": [\"linkedin\"], // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}",
|
||||
"options": {
|
||||
"raw": {
|
||||
"language": "json"
|
||||
|
|
Loading…
Reference in New Issue