pull/31/head
Zachary Hampton 2023-09-02 12:21:55 -07:00
parent 7efece8fe9
commit 9a86d2b1f5
21 changed files with 33 additions and 323 deletions

View File

@ -40,7 +40,7 @@ jobs:
fi
# not checking currently because of bad ip at Github's servers being blocked
# - name: Check HTTP status to POST /api/v1/jobs/
# - name: Check HTTP status to POST /src/v1/jobs/
# run: |
# response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
# "site_type": ["indeed", "linkedin"],

View File

@ -1,9 +0,0 @@
from fastapi import APIRouter
from api.auth import router as auth_router
from .v1 import router as v1_router
router = APIRouter(
prefix="/api",
)
router.include_router(v1_router)
router.include_router(auth_router)

View File

@ -1,8 +0,0 @@
from fastapi import APIRouter
from api.auth.token import router as token_router
from api.auth.register import router as register_router
router = APIRouter(prefix="/auth", tags=["auth"])
router.include_router(token_router)
router.include_router(register_router)

View File

@ -1,65 +0,0 @@
from datetime import datetime, timedelta
from jose import jwt, JWTError
from fastapi import HTTPException, status, Depends
from fastapi.security import OAuth2PasswordBearer
from api.core.users import TokenData
from api.auth.db_utils import UserInDB, get_user
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
def create_access_token(data: dict) -> str:
"""
Creates a JWT token based on the data provided.
:param data
:return: encoded_jwt
"""
to_encode = data.copy()
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM)
return encoded_jwt
async def get_current_user(token: str = Depends(oauth2_scheme)):
"""
Returns the current user associated with the provided JWT token.
:param token
:raises HTTPException: If the token is invalid or the user does not exist.
:return: The UserInDB instance associated with the token.
"""
credential_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
try:
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
username: str = payload.get("sub")
if username is None:
raise credential_exception
token_data = TokenData(username=username)
except JWTError:
raise credential_exception
current_user = get_user(token_data.username)
if current_user is None:
raise credential_exception
return current_user
async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)):
"""
Returns the current user if the user account is active.
:param current_user: A UserInDB instance representing the current user.
:raises HTTPException: If the user account is inactive.
:return: The UserInDB instance if the user account is active.
"""
if current_user.disabled:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user."
)
return current_user

View File

@ -1,89 +0,0 @@
from typing import Optional, Union
from passlib.context import CryptContext
from supabase_py import create_client, Client
from fastapi import HTTPException, status
from api.core.users import UserInDB
from settings import SUPABASE_URL, SUPABASE_KEY
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
if SUPABASE_URL:
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
def create_user(user_create: UserInDB):
"""
Creates a new user record in the 'users' table in Supabase.
:param user_create: The data of the user to be created.
:raises HTTPException: If an error occurs while creating the user.
:return: The result of the insert operation.
"""
result = supabase.table("users").insert(user_create.dict()).execute()
print(f"Insert result: {result}")
if "error" in result and result["error"]:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"User could not be created due to {result['error']['message']}",
)
return result
def get_user(username: str) -> Optional[UserInDB]:
"""
Retrieves a user from the 'users' table by their username.
:param username: The username of the user to retrieve.
:return: The user data if found, otherwise None.
"""
result = supabase.table("users").select().eq("username", username).execute()
if "error" in result and result["error"]:
print(f"Error: {result['error']['message']}")
return None
else:
if result["data"]:
user_data = result["data"][0]
return UserInDB(**user_data)
else:
return None
def verify_password(password: str, hashed_password: str) -> bool:
"""
Verifies a password against a hashed password using the bcrypt hashing algorithm.
:param password: The plaintext password to verify.
:param hashed_password: The hashed password to compare against.
:return: True if the password matches the hashed password, otherwise False.
"""
return pwd_context.verify(password, hashed_password)
def get_password_hash(password: str) -> str:
"""
Hashes a password using the bcrypt hashing algorithm.
:param password: The plaintext password to hash.
:return: The hashed password
"""
return pwd_context.hash(password)
def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]:
"""
Authenticates a user based on their username and password.
:param username: The username of the user to authenticate.
:param password: The plaintext password to authenticate.
:return: The authenticated user if the username and password are correct, otherwise False.
"""
user = get_user(username)
if not user:
return False
if not verify_password(password, user.hashed_password):
return False
return user

View File

@ -1,33 +0,0 @@
from fastapi import APIRouter, HTTPException, status
from api.core.users import UserCreate, UserInDB
from api.auth.db_utils import get_user, get_password_hash, create_user
router = APIRouter(prefix="/register")
@router.post("/", response_model=dict)
async def register_new_user(user: UserCreate) -> dict:
"""
Creates new user
:param user:
:raises HTTPException: If the username already exists.
:return: A dictionary containing a detail key with a success message.
"""
existing_user = get_user(user.username)
if existing_user is not None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Username already exists",
)
hashed_password = get_password_hash(user.password)
user_create = UserInDB(
username=user.username,
email=user.email,
full_name=user.full_name,
hashed_password=hashed_password,
disabled=False,
)
create_user(user_create)
return {"detail": "User created successfully"}

View File

@ -1,30 +0,0 @@
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordRequestForm
from api.core.users import Token
from api.auth.db_utils import authenticate_user
from api.auth.auth_utils import create_access_token
router = APIRouter(prefix="/token")
@router.post("/", response_model=Token)
async def login_for_access_token(
form_data: OAuth2PasswordRequestForm = Depends(),
) -> Token:
"""
Authenticates a user and provides an access token.
:param form_data: OAuth2PasswordRequestForm object containing the user's credentials.
:raises HTTPException: If the user cannot be authenticated.
:return: A Token object containing the access token and the token type.
"""
user = authenticate_user(form_data.username, form_data.password)
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
access_token = create_access_token(data={"sub": user.username})
return Token(access_token=access_token, token_type="bearer")

View File

@ -1,28 +0,0 @@
from pydantic import BaseModel
class User(BaseModel):
username: str
full_name: str
email: str
disabled: bool = False
class UserCreate(BaseModel):
username: str
full_name: str
email: str
password: str
class UserInDB(User):
hashed_password: str
class TokenData(BaseModel):
username: str
class Token(BaseModel):
access_token: str
token_type: str

View File

@ -1,11 +0,0 @@
from fastapi import APIRouter, Depends
from .jobs import router as jobs_router
from api.auth.auth_utils import get_active_current_user
from settings import AUTH_REQUIRED
if AUTH_REQUIRED:
router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)])
else:
router = APIRouter(prefix="/v1")
router.include_router(jobs_router)

16
main.py
View File

@ -1,16 +0,0 @@
from fastapi import FastAPI
from supabase_py import create_client, Client
from api import router as api_router
app = FastAPI(
title="JobSpy Backend",
description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers",
version="1.0.0",
)
app.include_router(api_router)
@app.get("/health", tags=["health"])
async def health_check():
return {"message": "JobSpy ready to scrape"}

14
pyproject.toml Normal file
View File

@ -0,0 +1,14 @@
[tool.poetry]
name = "jobspy"
version = "0.1.0"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View File

@ -1,14 +0,0 @@
from dotenv import load_dotenv
import os
load_dotenv()
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
# optional autha
AUTH_REQUIRED = False
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
ACCESS_TOKEN_EXPIRE_MINUTES = 60
ALGORITHM = "HS256"

View File

@ -3,11 +3,11 @@ from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from concurrent.futures import ThreadPoolExecutor
from api.core.scrapers.indeed import IndeedScraper
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
from api.core.scrapers.linkedin import LinkedInScraper
from api.core.formatters.csv import CSVFormatter
from api.core.scrapers import (
from .core.scrapers.indeed import IndeedScraper
from .core.scrapers.ziprecruiter import ZipRecruiterScraper
from .core.scrapers.linkedin import LinkedInScraper
from .core.formatters.csv import CSVFormatter
from .core.scrapers import (
ScraperInput,
Site,
JobResponse,
@ -37,7 +37,7 @@ async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class()
scraped_data: JobResponse = scraper.scrape(scraper_input)
return (site.value, scraped_data)
return site.value, scraped_data
with ThreadPoolExecutor(max_workers=3) as executor:
results = dict(executor.map(scrape_site, scraper_input.site_type))

View File

@ -7,20 +7,20 @@ from datetime import datetime
from ...jobs import *
from ...scrapers import *
from settings import *
class CSVFormatter:
@staticmethod
def fetch_job_urls(credentials: Any) -> set:
def fetch_job_urls(credentials: Any, google_sheet_name: str) -> set:
"""
Fetches all the job urls from the google sheet to prevent duplicates
:param credentials:
:param google_sheet_name:
:return: urls
"""
try:
gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME)
sh = gc.open(google_sheet_name)
worksheet = sh.get_worksheet(0)
data = worksheet.get_all_values()
@ -32,10 +32,11 @@ class CSVFormatter:
raise e
@staticmethod
def upload_to_google_sheet(csv_data: str):
def upload_to_google_sheet(csv_data: str, google_sheet_name: str):
"""
Appends rows to google sheet
:param csv_data:
:param google_sheet_name:
:return:
"""
try:
@ -48,7 +49,7 @@ class CSVFormatter:
"client_secret.json", scope
)
gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME)
sh = gc.open(google_sheet_name)
worksheet = sh.get_worksheet(0)
data_string = csv_data.getvalue()

View File

@ -9,9 +9,8 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from fastapi import status
from api.core.jobs import *
from api.core.jobs import JobPost
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
from .. import Scraper, ScraperInput, Site, StatusException
from concurrent.futures import ThreadPoolExecutor, Future
import math

View File

@ -6,8 +6,8 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from fastapi import status
from api.core.scrapers import Scraper, ScraperInput, Site
from api.core.jobs import *
from .. import Scraper, ScraperInput, Site
from ...jobs import JobPost, Location, JobResponse, JobType
class LinkedInScraper(Scraper):

View File

@ -10,9 +10,8 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor, Future
from api.core.jobs import JobPost
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
from api.core.jobs import *
from ...scrapers import Scraper, ScraperInput, Site, StatusException
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
class ZipRecruiterScraper(Scraper):