mirror of https://github.com/Bunsly/JobSpy
Feat/multiple sites (#12)
* adding multiple search sites * updating docs and postman * threading per scraper typepull/15/head
parent
d67383f053
commit
4d04bb63e2
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Module",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"module": "uvicorn",
|
||||||
|
"args": ["main:app","--reload"]
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
||||||
|
}
|
|
@ -17,13 +17,13 @@
|
||||||
**Endpoint**: `/api/v1/jobs/`
|
**Endpoint**: `/api/v1/jobs/`
|
||||||
|
|
||||||
#### Parameters:
|
#### Parameters:
|
||||||
- **site_type**: str (Required) - Options: `linkedin`, `zip_recruiter`, `indeed`
|
- **site_type**: List[str] (Required) - Options: `linkedin`, `zip_recruiter`, `indeed`
|
||||||
- **search_term**: str (Required)
|
- **search_term**: str (Required)
|
||||||
- **location**: int
|
- **location**: int
|
||||||
- **distance**: int
|
- **distance**: int
|
||||||
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
|
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
|
||||||
- **is_remote**: bool
|
- **is_remote**: bool
|
||||||
- **results_wanted**: int
|
- **results_wanted**: int (per `site_type`)
|
||||||
- **easy_apply**: bool (Only for LinkedIn)
|
- **easy_apply**: bool (Only for LinkedIn)
|
||||||
|
|
||||||
### Example
|
### Example
|
||||||
|
|
|
@ -8,7 +8,8 @@ from api.core.users import UserInDB
|
||||||
from settings import SUPABASE_URL, SUPABASE_KEY
|
from settings import SUPABASE_URL, SUPABASE_KEY
|
||||||
|
|
||||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
if SUPABASE_URL:
|
||||||
|
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
||||||
|
|
||||||
|
|
||||||
def create_user(user_create: UserInDB):
|
def create_user(user_create: UserInDB):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from ..jobs import *
|
from ..jobs import *
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
class StatusException(Exception):
|
class StatusException(Exception):
|
||||||
|
@ -13,7 +14,7 @@ class Site(Enum):
|
||||||
|
|
||||||
|
|
||||||
class ScraperInput(BaseModel):
|
class ScraperInput(BaseModel):
|
||||||
site_type: Site
|
site_type: List[Site]
|
||||||
search_term: str
|
search_term: str
|
||||||
|
|
||||||
location: str = None
|
location: str = None
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
from api.core.scrapers.indeed import IndeedScraper
|
from api.core.scrapers.indeed import IndeedScraper
|
||||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||||
from api.core.scrapers.linkedin import LinkedInScraper
|
from api.core.scrapers.linkedin import LinkedInScraper
|
||||||
from api.core.scrapers import ScraperInput, Site, JobResponse
|
from api.core.scrapers import ScraperInput, Site, JobResponse
|
||||||
|
from typing import List
|
||||||
|
|
||||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
||||||
|
|
||||||
|
@ -14,11 +16,14 @@ SCRAPER_MAPPING = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=JobResponse)
|
@router.post("/", response_model=List[JobResponse])
|
||||||
async def scrape_jobs(scraper_input: ScraperInput):
|
async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
|
||||||
scraper_class = SCRAPER_MAPPING[scraper_input.site_type]
|
def scrape_site(site: str) -> JobResponse:
|
||||||
|
scraper_class = SCRAPER_MAPPING[site]
|
||||||
scraper = scraper_class()
|
scraper = scraper_class()
|
||||||
|
return scraper.scrape(scraper_input)
|
||||||
|
|
||||||
job_response = scraper.scrape(scraper_input)
|
with ThreadPoolExecutor() as executor:
|
||||||
|
resp = list(executor.map(scrape_site, scraper_input.site_type))
|
||||||
|
|
||||||
return job_response
|
return resp
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
"header": [],
|
"header": [],
|
||||||
"body": {
|
"body": {
|
||||||
"mode": "raw",
|
"mode": "raw",
|
||||||
"raw": "{\r\n \"site_type\": \"linkedin\", // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}",
|
"raw": "{\r\n \"site_type\": [\"linkedin\"], // linkedin / indeed / zip_recruiter\r\n \"search_term\": \"engineer\",\r\n\r\n // optional\r\n \"location\": \"tx\",\r\n \"distance\": 10,\r\n \"job_type\": \"fulltime\", // fulltime, parttime, internship, contract\r\n // \"is_remote\": true,\r\n \"easy_apply\": true, // linkedin only\r\n \"results_wanted\": 10 \r\n}",
|
||||||
"options": {
|
"options": {
|
||||||
"raw": {
|
"raw": {
|
||||||
"language": "json"
|
"language": "json"
|
||||||
|
|
Loading…
Reference in New Issue