diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b8a874a..0f04208 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,7 @@ jobs: fi # not checking currently because of bad ip at Github's servers being blocked -# - name: Check HTTP status to POST /api/v1/jobs/ +# - name: Check HTTP status to POST /src/v1/jobs/ # run: | # response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{ # "site_type": ["indeed", "linkedin"], diff --git a/api/__init__.py b/api/__init__.py deleted file mode 100644 index e0827f6..0000000 --- a/api/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from fastapi import APIRouter -from api.auth import router as auth_router -from .v1 import router as v1_router - -router = APIRouter( - prefix="/api", -) -router.include_router(v1_router) -router.include_router(auth_router) diff --git a/api/auth/__init__.py b/api/auth/__init__.py deleted file mode 100644 index 3ca029d..0000000 --- a/api/auth/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from fastapi import APIRouter - -from api.auth.token import router as token_router -from api.auth.register import router as register_router - -router = APIRouter(prefix="/auth", tags=["auth"]) -router.include_router(token_router) -router.include_router(register_router) diff --git a/api/auth/auth_utils.py b/api/auth/auth_utils.py deleted file mode 100644 index 524424d..0000000 --- a/api/auth/auth_utils.py +++ /dev/null @@ -1,65 +0,0 @@ -from datetime import datetime, timedelta - -from jose import jwt, JWTError -from fastapi import HTTPException, status, Depends -from fastapi.security import OAuth2PasswordBearer - -from api.core.users import TokenData -from api.auth.db_utils import UserInDB, get_user - -oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token") - - -def create_access_token(data: dict) -> str: - """ - Creates a JWT token based on the data provided. - :param data - :return: encoded_jwt - """ - to_encode = data.copy() - expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) - to_encode.update({"exp": expire}) - encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM) - return encoded_jwt - - -async def get_current_user(token: str = Depends(oauth2_scheme)): - """ - Returns the current user associated with the provided JWT token. - :param token - :raises HTTPException: If the token is invalid or the user does not exist. - :return: The UserInDB instance associated with the token. - """ - credential_exception = HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", - headers={"WWW-Authenticate": "Bearer"}, - ) - try: - payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM]) - username: str = payload.get("sub") - if username is None: - raise credential_exception - token_data = TokenData(username=username) - except JWTError: - raise credential_exception - - current_user = get_user(token_data.username) - if current_user is None: - raise credential_exception - return current_user - - -async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)): - """ - Returns the current user if the user account is active. - - :param current_user: A UserInDB instance representing the current user. - :raises HTTPException: If the user account is inactive. - :return: The UserInDB instance if the user account is active. - """ - if current_user.disabled: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user." - ) - return current_user diff --git a/api/auth/db_utils.py b/api/auth/db_utils.py deleted file mode 100644 index 696513a..0000000 --- a/api/auth/db_utils.py +++ /dev/null @@ -1,89 +0,0 @@ -from typing import Optional, Union - -from passlib.context import CryptContext -from supabase_py import create_client, Client -from fastapi import HTTPException, status - -from api.core.users import UserInDB -from settings import SUPABASE_URL, SUPABASE_KEY - -pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") -if SUPABASE_URL: - supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) - - -def create_user(user_create: UserInDB): - """ - Creates a new user record in the 'users' table in Supabase. - - :param user_create: The data of the user to be created. - :raises HTTPException: If an error occurs while creating the user. - :return: The result of the insert operation. - """ - result = supabase.table("users").insert(user_create.dict()).execute() - print(f"Insert result: {result}") - - if "error" in result and result["error"]: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"User could not be created due to {result['error']['message']}", - ) - - return result - - -def get_user(username: str) -> Optional[UserInDB]: - """ - Retrieves a user from the 'users' table by their username. - - :param username: The username of the user to retrieve. - :return: The user data if found, otherwise None. - """ - result = supabase.table("users").select().eq("username", username).execute() - - if "error" in result and result["error"]: - print(f"Error: {result['error']['message']}") - return None - else: - if result["data"]: - user_data = result["data"][0] - return UserInDB(**user_data) - else: - return None - - -def verify_password(password: str, hashed_password: str) -> bool: - """ - Verifies a password against a hashed password using the bcrypt hashing algorithm. - - :param password: The plaintext password to verify. - :param hashed_password: The hashed password to compare against. - :return: True if the password matches the hashed password, otherwise False. - """ - return pwd_context.verify(password, hashed_password) - - -def get_password_hash(password: str) -> str: - """ - Hashes a password using the bcrypt hashing algorithm. - - :param password: The plaintext password to hash. - :return: The hashed password - """ - return pwd_context.hash(password) - - -def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]: - """ - Authenticates a user based on their username and password. - - :param username: The username of the user to authenticate. - :param password: The plaintext password to authenticate. - :return: The authenticated user if the username and password are correct, otherwise False. - """ - user = get_user(username) - if not user: - return False - if not verify_password(password, user.hashed_password): - return False - return user diff --git a/api/auth/register/__init__.py b/api/auth/register/__init__.py deleted file mode 100644 index 33619ec..0000000 --- a/api/auth/register/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from fastapi import APIRouter, HTTPException, status -from api.core.users import UserCreate, UserInDB -from api.auth.db_utils import get_user, get_password_hash, create_user - -router = APIRouter(prefix="/register") - - -@router.post("/", response_model=dict) -async def register_new_user(user: UserCreate) -> dict: - """ - Creates new user - :param user: - :raises HTTPException: If the username already exists. - :return: A dictionary containing a detail key with a success message. - """ - existing_user = get_user(user.username) - if existing_user is not None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Username already exists", - ) - - hashed_password = get_password_hash(user.password) - user_create = UserInDB( - username=user.username, - email=user.email, - full_name=user.full_name, - hashed_password=hashed_password, - disabled=False, - ) - create_user(user_create) - - return {"detail": "User created successfully"} diff --git a/api/auth/token/__init__.py b/api/auth/token/__init__.py deleted file mode 100644 index 6822083..0000000 --- a/api/auth/token/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException, status -from fastapi.security import OAuth2PasswordRequestForm - -from api.core.users import Token -from api.auth.db_utils import authenticate_user -from api.auth.auth_utils import create_access_token - -router = APIRouter(prefix="/token") - - -@router.post("/", response_model=Token) -async def login_for_access_token( - form_data: OAuth2PasswordRequestForm = Depends(), -) -> Token: - """ - Authenticates a user and provides an access token. - :param form_data: OAuth2PasswordRequestForm object containing the user's credentials. - :raises HTTPException: If the user cannot be authenticated. - :return: A Token object containing the access token and the token type. - """ - user = authenticate_user(form_data.username, form_data.password) - if not user: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Incorrect username or password", - headers={"WWW-Authenticate": "Bearer"}, - ) - - access_token = create_access_token(data={"sub": user.username}) - return Token(access_token=access_token, token_type="bearer") diff --git a/api/core/users/__init__.py b/api/core/users/__init__.py deleted file mode 100644 index 55f7e8f..0000000 --- a/api/core/users/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from pydantic import BaseModel - - -class User(BaseModel): - username: str - full_name: str - email: str - disabled: bool = False - - -class UserCreate(BaseModel): - username: str - full_name: str - email: str - password: str - - -class UserInDB(User): - hashed_password: str - - -class TokenData(BaseModel): - username: str - - -class Token(BaseModel): - access_token: str - token_type: str diff --git a/api/v1/__init__.py b/api/v1/__init__.py deleted file mode 100644 index 29d9e8d..0000000 --- a/api/v1/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from fastapi import APIRouter, Depends -from .jobs import router as jobs_router -from api.auth.auth_utils import get_active_current_user -from settings import AUTH_REQUIRED - -if AUTH_REQUIRED: - router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)]) -else: - router = APIRouter(prefix="/v1") - -router.include_router(jobs_router) diff --git a/main.py b/main.py deleted file mode 100644 index 7c0e3cc..0000000 --- a/main.py +++ /dev/null @@ -1,16 +0,0 @@ -from fastapi import FastAPI - -from supabase_py import create_client, Client -from api import router as api_router - -app = FastAPI( - title="JobSpy Backend", - description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers", - version="1.0.0", -) -app.include_router(api_router) - - -@app.get("/health", tags=["health"]) -async def health_check(): - return {"message": "JobSpy ready to scrape"} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..95d92aa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "jobspy" +version = "0.1.0" +description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" +authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>"] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.10" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/settings.py b/settings.py deleted file mode 100644 index ff17271..0000000 --- a/settings.py +++ /dev/null @@ -1,14 +0,0 @@ -from dotenv import load_dotenv -import os - -load_dotenv() -# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing) -GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy") - -# optional autha -AUTH_REQUIRED = False -SUPABASE_URL = os.environ.get("SUPABASE_URL") -SUPABASE_KEY = os.environ.get("SUPABASE_KEY") -JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY") -ACCESS_TOKEN_EXPIRE_MINUTES = 60 -ALGORITHM = "HS256" diff --git a/api/v1/jobs/__init__.py b/src/__init__.py similarity index 87% rename from api/v1/jobs/__init__.py rename to src/__init__.py index d7bb363..6213f49 100644 --- a/api/v1/jobs/__init__.py +++ b/src/__init__.py @@ -3,11 +3,11 @@ from fastapi import APIRouter from fastapi.responses import StreamingResponse from concurrent.futures import ThreadPoolExecutor -from api.core.scrapers.indeed import IndeedScraper -from api.core.scrapers.ziprecruiter import ZipRecruiterScraper -from api.core.scrapers.linkedin import LinkedInScraper -from api.core.formatters.csv import CSVFormatter -from api.core.scrapers import ( +from .core.scrapers.indeed import IndeedScraper +from .core.scrapers.ziprecruiter import ZipRecruiterScraper +from .core.scrapers.linkedin import LinkedInScraper +from .core.formatters.csv import CSVFormatter +from .core.scrapers import ( ScraperInput, Site, JobResponse, @@ -37,7 +37,7 @@ async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse: scraper_class = SCRAPER_MAPPING[site] scraper = scraper_class() scraped_data: JobResponse = scraper.scrape(scraper_input) - return (site.value, scraped_data) + return site.value, scraped_data with ThreadPoolExecutor(max_workers=3) as executor: results = dict(executor.map(scrape_site, scraper_input.site_type)) diff --git a/api/core/__init__.py b/src/core/__init__.py similarity index 100% rename from api/core/__init__.py rename to src/core/__init__.py diff --git a/api/core/formatters/__init__.py b/src/core/formatters/__init__.py similarity index 100% rename from api/core/formatters/__init__.py rename to src/core/formatters/__init__.py diff --git a/api/core/formatters/csv/__init__.py b/src/core/formatters/csv/__init__.py similarity index 92% rename from api/core/formatters/csv/__init__.py rename to src/core/formatters/csv/__init__.py index ab89248..c3a98b0 100644 --- a/api/core/formatters/csv/__init__.py +++ b/src/core/formatters/csv/__init__.py @@ -7,20 +7,20 @@ from datetime import datetime from ...jobs import * from ...scrapers import * -from settings import * class CSVFormatter: @staticmethod - def fetch_job_urls(credentials: Any) -> set: + def fetch_job_urls(credentials: Any, google_sheet_name: str) -> set: """ Fetches all the job urls from the google sheet to prevent duplicates :param credentials: + :param google_sheet_name: :return: urls """ try: gc = gspread.authorize(credentials) - sh = gc.open(GSHEET_NAME) + sh = gc.open(google_sheet_name) worksheet = sh.get_worksheet(0) data = worksheet.get_all_values() @@ -32,10 +32,11 @@ class CSVFormatter: raise e @staticmethod - def upload_to_google_sheet(csv_data: str): + def upload_to_google_sheet(csv_data: str, google_sheet_name: str): """ Appends rows to google sheet :param csv_data: + :param google_sheet_name: :return: """ try: @@ -48,7 +49,7 @@ class CSVFormatter: "client_secret.json", scope ) gc = gspread.authorize(credentials) - sh = gc.open(GSHEET_NAME) + sh = gc.open(google_sheet_name) worksheet = sh.get_worksheet(0) data_string = csv_data.getvalue() diff --git a/api/core/jobs/__init__.py b/src/core/jobs/__init__.py similarity index 100% rename from api/core/jobs/__init__.py rename to src/core/jobs/__init__.py diff --git a/api/core/scrapers/__init__.py b/src/core/scrapers/__init__.py similarity index 100% rename from api/core/scrapers/__init__.py rename to src/core/scrapers/__init__.py diff --git a/api/core/scrapers/indeed/__init__.py b/src/core/scrapers/indeed/__init__.py similarity index 98% rename from api/core/scrapers/indeed/__init__.py rename to src/core/scrapers/indeed/__init__.py index 60778f5..0d60814 100644 --- a/api/core/scrapers/indeed/__init__.py +++ b/src/core/scrapers/indeed/__init__.py @@ -9,9 +9,8 @@ from bs4 import BeautifulSoup from bs4.element import Tag from fastapi import status -from api.core.jobs import * -from api.core.jobs import JobPost -from api.core.scrapers import Scraper, ScraperInput, Site, StatusException +from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType +from .. import Scraper, ScraperInput, Site, StatusException from concurrent.futures import ThreadPoolExecutor, Future import math diff --git a/api/core/scrapers/linkedin/__init__.py b/src/core/scrapers/linkedin/__init__.py similarity index 98% rename from api/core/scrapers/linkedin/__init__.py rename to src/core/scrapers/linkedin/__init__.py index c39458a..eb211a3 100644 --- a/api/core/scrapers/linkedin/__init__.py +++ b/src/core/scrapers/linkedin/__init__.py @@ -6,8 +6,8 @@ from bs4 import BeautifulSoup from bs4.element import Tag from fastapi import status -from api.core.scrapers import Scraper, ScraperInput, Site -from api.core.jobs import * +from .. import Scraper, ScraperInput, Site +from ...jobs import JobPost, Location, JobResponse, JobType class LinkedInScraper(Scraper): diff --git a/api/core/scrapers/ziprecruiter/__init__.py b/src/core/scrapers/ziprecruiter/__init__.py similarity index 98% rename from api/core/scrapers/ziprecruiter/__init__.py rename to src/core/scrapers/ziprecruiter/__init__.py index 15962af..cf329dd 100644 --- a/api/core/scrapers/ziprecruiter/__init__.py +++ b/src/core/scrapers/ziprecruiter/__init__.py @@ -10,9 +10,8 @@ from bs4 import BeautifulSoup from bs4.element import Tag from concurrent.futures import ThreadPoolExecutor, Future -from api.core.jobs import JobPost -from api.core.scrapers import Scraper, ScraperInput, Site, StatusException -from api.core.jobs import * +from ...scrapers import Scraper, ScraperInput, Site, StatusException +from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType class ZipRecruiterScraper(Scraper):