From 8c7d924a8521afdea04a121431816f5950e0d225 Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Sat, 2 Sep 2023 15:17:01 -0700 Subject: [PATCH] - new schema (includes bugs) --- .gitignore | 3 ++- src/{ => jobspy}/__init__.py | 11 +++++++---- src/{ => jobspy}/core/__init__.py | 0 src/{ => jobspy}/core/jobs/__init__.py | 4 ++-- src/{ => jobspy}/core/scrapers/indeed/__init__.py | 15 ++++++++++----- .../core/scrapers/linkedin/__init__.py | 0 .../core/scrapers/ziprecruiter/__init__.py | 0 tests/test_indeed.py | 2 +- tests/test_ziprecruiter.py | 2 +- 9 files changed, 23 insertions(+), 14 deletions(-) rename src/{ => jobspy}/__init__.py (93%) rename src/{ => jobspy}/core/__init__.py (100%) rename src/{ => jobspy}/core/jobs/__init__.py (96%) rename src/{ => jobspy}/core/scrapers/indeed/__init__.py (96%) rename src/{ => jobspy}/core/scrapers/linkedin/__init__.py (100%) rename src/{ => jobspy}/core/scrapers/ziprecruiter/__init__.py (100%) diff --git a/.gitignore b/.gitignore index 125a75a..9391c0a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ **/__pycache__/ *.pyc .env -client_secret.json \ No newline at end of file +client_secret.json +dist \ No newline at end of file diff --git a/src/__init__.py b/src/jobspy/__init__.py similarity index 93% rename from src/__init__.py rename to src/jobspy/__init__.py index 9b7281d..44e9044 100644 --- a/src/__init__.py +++ b/src/jobspy/__init__.py @@ -7,12 +7,11 @@ from .core.scrapers import ( ScraperInput, Site, JobResponse, - CommonResponse, ) import pandas as pd from .core.jobs import JobType -from typing import List, Dict, Tuple, Union +from typing import List, Tuple SCRAPER_MAPPING = { Site.LINKEDIN: LinkedInScraper, @@ -49,13 +48,17 @@ def scrape_jobs( site_type=site_type, search_term=search_term, location=location, - distance=distance, is_remote=is_remote, - job_type=job_type, easy_apply=easy_apply, results_wanted=results_wanted, ) + if distance: + scraper_input.distance = distance + + if job_type: + scraper_input.job_type = job_type + def scrape_site(site: Site) -> Tuple[str, JobResponse]: scraper_class = SCRAPER_MAPPING[site] scraper = scraper_class() diff --git a/src/core/__init__.py b/src/jobspy/core/__init__.py similarity index 100% rename from src/core/__init__.py rename to src/jobspy/core/__init__.py diff --git a/src/core/jobs/__init__.py b/src/jobspy/core/jobs/__init__.py similarity index 96% rename from src/core/jobs/__init__.py rename to src/jobspy/core/jobs/__init__.py index 823eb82..cba8323 100644 --- a/src/core/jobs/__init__.py +++ b/src/jobspy/core/jobs/__init__.py @@ -21,8 +21,8 @@ class JobType(Enum): class Location(BaseModel): country: str = "USA" - city: str = None - state: str = None + city: str = "" + state: str = "" class CompensationInterval(Enum): diff --git a/src/core/scrapers/indeed/__init__.py b/src/jobspy/core/scrapers/indeed/__init__.py similarity index 96% rename from src/core/scrapers/indeed/__init__.py rename to src/jobspy/core/scrapers/indeed/__init__.py index 8ffa0b6..126a13d 100644 --- a/src/core/scrapers/indeed/__init__.py +++ b/src/jobspy/core/scrapers/indeed/__init__.py @@ -127,15 +127,20 @@ class IndeedScraper(Scraper): description=description, company_name=job["company"], location=Location( - city=job.get("jobLocationCity"), - state=job.get("jobLocationState"), - postal_code=job.get("jobLocationPostal"), + city=job.get("jobLocationCity", ""), + state=job.get("jobLocationState", ""), + postal_code=job.get("jobLocationPostal", ""), ), - job_type=job_type, - compensation=compensation, date_posted=date_posted, job_url=job_url_client, ) + + if compensation: + job_post.compensation = compensation + + if job_type: + job_post.job_type = job_type + return job_post for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]: diff --git a/src/core/scrapers/linkedin/__init__.py b/src/jobspy/core/scrapers/linkedin/__init__.py similarity index 100% rename from src/core/scrapers/linkedin/__init__.py rename to src/jobspy/core/scrapers/linkedin/__init__.py diff --git a/src/core/scrapers/ziprecruiter/__init__.py b/src/jobspy/core/scrapers/ziprecruiter/__init__.py similarity index 100% rename from src/core/scrapers/ziprecruiter/__init__.py rename to src/jobspy/core/scrapers/ziprecruiter/__init__.py diff --git a/tests/test_indeed.py b/tests/test_indeed.py index 25dd8a0..6f0075a 100644 --- a/tests/test_indeed.py +++ b/tests/test_indeed.py @@ -1,4 +1,4 @@ -from src import scrape_jobs +from src.jobspy import scrape_jobs def test_indeed(): diff --git a/tests/test_ziprecruiter.py b/tests/test_ziprecruiter.py index eb56693..c4fbdae 100644 --- a/tests/test_ziprecruiter.py +++ b/tests/test_ziprecruiter.py @@ -1,4 +1,4 @@ -from src import scrape_jobs +from src.jobspy import scrape_jobs def test_ziprecruiter():