- new schema (includes bugs)

pull/31/head
Zachary Hampton 2023-09-02 15:17:01 -07:00
parent ad8b27ac00
commit 8c7d924a85
9 changed files with 23 additions and 14 deletions

3
.gitignore vendored
View File

@ -5,4 +5,5 @@
**/__pycache__/ **/__pycache__/
*.pyc *.pyc
.env .env
client_secret.json client_secret.json
dist

View File

@ -7,12 +7,11 @@ from .core.scrapers import (
ScraperInput, ScraperInput,
Site, Site,
JobResponse, JobResponse,
CommonResponse,
) )
import pandas as pd import pandas as pd
from .core.jobs import JobType from .core.jobs import JobType
from typing import List, Dict, Tuple, Union from typing import List, Tuple
SCRAPER_MAPPING = { SCRAPER_MAPPING = {
Site.LINKEDIN: LinkedInScraper, Site.LINKEDIN: LinkedInScraper,
@ -49,13 +48,17 @@ def scrape_jobs(
site_type=site_type, site_type=site_type,
search_term=search_term, search_term=search_term,
location=location, location=location,
distance=distance,
is_remote=is_remote, is_remote=is_remote,
job_type=job_type,
easy_apply=easy_apply, easy_apply=easy_apply,
results_wanted=results_wanted, results_wanted=results_wanted,
) )
if distance:
scraper_input.distance = distance
if job_type:
scraper_input.job_type = job_type
def scrape_site(site: Site) -> Tuple[str, JobResponse]: def scrape_site(site: Site) -> Tuple[str, JobResponse]:
scraper_class = SCRAPER_MAPPING[site] scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class() scraper = scraper_class()

View File

@ -21,8 +21,8 @@ class JobType(Enum):
class Location(BaseModel): class Location(BaseModel):
country: str = "USA" country: str = "USA"
city: str = None city: str = ""
state: str = None state: str = ""
class CompensationInterval(Enum): class CompensationInterval(Enum):

View File

@ -127,15 +127,20 @@ class IndeedScraper(Scraper):
description=description, description=description,
company_name=job["company"], company_name=job["company"],
location=Location( location=Location(
city=job.get("jobLocationCity"), city=job.get("jobLocationCity", ""),
state=job.get("jobLocationState"), state=job.get("jobLocationState", ""),
postal_code=job.get("jobLocationPostal"), postal_code=job.get("jobLocationPostal", ""),
), ),
job_type=job_type,
compensation=compensation,
date_posted=date_posted, date_posted=date_posted,
job_url=job_url_client, job_url=job_url_client,
) )
if compensation:
job_post.compensation = compensation
if job_type:
job_post.job_type = job_type
return job_post return job_post
for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]: for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]:

View File

@ -1,4 +1,4 @@
from src import scrape_jobs from src.jobspy import scrape_jobs
def test_indeed(): def test_indeed():

View File

@ -1,4 +1,4 @@
from src import scrape_jobs from src.jobspy import scrape_jobs
def test_ziprecruiter(): def test_ziprecruiter():