mirror of https://github.com/Bunsly/JobSpy
feat(scraper): threading per scraper type
parent
790dc12fdf
commit
7285ca7108
|
@ -23,7 +23,7 @@
|
||||||
- **distance**: int
|
- **distance**: int
|
||||||
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
|
- **job_type**: str - Options: `fulltime`, `parttime`, `internship`, `contract`
|
||||||
- **is_remote**: bool
|
- **is_remote**: bool
|
||||||
- **results_wanted**: int
|
- **results_wanted**: int (per `site_type`)
|
||||||
- **easy_apply**: bool (Only for LinkedIn)
|
- **easy_apply**: bool (Only for LinkedIn)
|
||||||
|
|
||||||
### Example
|
### Example
|
||||||
|
|
|
@ -8,6 +8,7 @@ from api.core.users import UserInDB
|
||||||
from settings import SUPABASE_URL, SUPABASE_KEY
|
from settings import SUPABASE_URL, SUPABASE_KEY
|
||||||
|
|
||||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||||
|
if SUPABASE_URL:
|
||||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
from api.core.scrapers.indeed import IndeedScraper
|
from api.core.scrapers.indeed import IndeedScraper
|
||||||
|
@ -16,12 +17,13 @@ SCRAPER_MAPPING = {
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=List[JobResponse])
|
@router.post("/", response_model=List[JobResponse])
|
||||||
async def scrape_jobs(scraper_input: ScraperInput) -> JobResponse:
|
async def scrape_jobs(scraper_input: ScraperInput) -> List[JobResponse]:
|
||||||
resp = []
|
def scrape_site(site: str) -> JobResponse:
|
||||||
for site in scraper_input.site_type:
|
|
||||||
scraper_class = SCRAPER_MAPPING[site]
|
scraper_class = SCRAPER_MAPPING[site]
|
||||||
scraper = scraper_class()
|
scraper = scraper_class()
|
||||||
job_response = scraper.scrape(scraper_input)
|
return scraper.scrape(scraper_input)
|
||||||
resp.append(job_response)
|
|
||||||
|
with ThreadPoolExecutor() as executor:
|
||||||
|
resp = list(executor.map(scrape_site, scraper_input.site_type))
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
|
|
Loading…
Reference in New Issue