mirror of https://github.com/Bunsly/JobSpy
- init
parent
7efece8fe9
commit
9a86d2b1f5
|
@ -40,7 +40,7 @@ jobs:
|
|||
fi
|
||||
|
||||
# not checking currently because of bad ip at Github's servers being blocked
|
||||
# - name: Check HTTP status to POST /api/v1/jobs/
|
||||
# - name: Check HTTP status to POST /src/v1/jobs/
|
||||
# run: |
|
||||
# response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
|
||||
# "site_type": ["indeed", "linkedin"],
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
from fastapi import APIRouter
|
||||
from api.auth import router as auth_router
|
||||
from .v1 import router as v1_router
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api",
|
||||
)
|
||||
router.include_router(v1_router)
|
||||
router.include_router(auth_router)
|
|
@ -1,8 +0,0 @@
|
|||
from fastapi import APIRouter
|
||||
|
||||
from api.auth.token import router as token_router
|
||||
from api.auth.register import router as register_router
|
||||
|
||||
router = APIRouter(prefix="/auth", tags=["auth"])
|
||||
router.include_router(token_router)
|
||||
router.include_router(register_router)
|
|
@ -1,65 +0,0 @@
|
|||
from datetime import datetime, timedelta
|
||||
|
||||
from jose import jwt, JWTError
|
||||
from fastapi import HTTPException, status, Depends
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
|
||||
from api.core.users import TokenData
|
||||
from api.auth.db_utils import UserInDB, get_user
|
||||
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
|
||||
|
||||
|
||||
def create_access_token(data: dict) -> str:
|
||||
"""
|
||||
Creates a JWT token based on the data provided.
|
||||
:param data
|
||||
:return: encoded_jwt
|
||||
"""
|
||||
to_encode = data.copy()
|
||||
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
||||
to_encode.update({"exp": expire})
|
||||
encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM)
|
||||
return encoded_jwt
|
||||
|
||||
|
||||
async def get_current_user(token: str = Depends(oauth2_scheme)):
|
||||
"""
|
||||
Returns the current user associated with the provided JWT token.
|
||||
:param token
|
||||
:raises HTTPException: If the token is invalid or the user does not exist.
|
||||
:return: The UserInDB instance associated with the token.
|
||||
"""
|
||||
credential_exception = HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Could not validate credentials",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
try:
|
||||
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
|
||||
username: str = payload.get("sub")
|
||||
if username is None:
|
||||
raise credential_exception
|
||||
token_data = TokenData(username=username)
|
||||
except JWTError:
|
||||
raise credential_exception
|
||||
|
||||
current_user = get_user(token_data.username)
|
||||
if current_user is None:
|
||||
raise credential_exception
|
||||
return current_user
|
||||
|
||||
|
||||
async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)):
|
||||
"""
|
||||
Returns the current user if the user account is active.
|
||||
|
||||
:param current_user: A UserInDB instance representing the current user.
|
||||
:raises HTTPException: If the user account is inactive.
|
||||
:return: The UserInDB instance if the user account is active.
|
||||
"""
|
||||
if current_user.disabled:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user."
|
||||
)
|
||||
return current_user
|
|
@ -1,89 +0,0 @@
|
|||
from typing import Optional, Union
|
||||
|
||||
from passlib.context import CryptContext
|
||||
from supabase_py import create_client, Client
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
from api.core.users import UserInDB
|
||||
from settings import SUPABASE_URL, SUPABASE_KEY
|
||||
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
if SUPABASE_URL:
|
||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
||||
|
||||
|
||||
def create_user(user_create: UserInDB):
|
||||
"""
|
||||
Creates a new user record in the 'users' table in Supabase.
|
||||
|
||||
:param user_create: The data of the user to be created.
|
||||
:raises HTTPException: If an error occurs while creating the user.
|
||||
:return: The result of the insert operation.
|
||||
"""
|
||||
result = supabase.table("users").insert(user_create.dict()).execute()
|
||||
print(f"Insert result: {result}")
|
||||
|
||||
if "error" in result and result["error"]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"User could not be created due to {result['error']['message']}",
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_user(username: str) -> Optional[UserInDB]:
|
||||
"""
|
||||
Retrieves a user from the 'users' table by their username.
|
||||
|
||||
:param username: The username of the user to retrieve.
|
||||
:return: The user data if found, otherwise None.
|
||||
"""
|
||||
result = supabase.table("users").select().eq("username", username).execute()
|
||||
|
||||
if "error" in result and result["error"]:
|
||||
print(f"Error: {result['error']['message']}")
|
||||
return None
|
||||
else:
|
||||
if result["data"]:
|
||||
user_data = result["data"][0]
|
||||
return UserInDB(**user_data)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def verify_password(password: str, hashed_password: str) -> bool:
|
||||
"""
|
||||
Verifies a password against a hashed password using the bcrypt hashing algorithm.
|
||||
|
||||
:param password: The plaintext password to verify.
|
||||
:param hashed_password: The hashed password to compare against.
|
||||
:return: True if the password matches the hashed password, otherwise False.
|
||||
"""
|
||||
return pwd_context.verify(password, hashed_password)
|
||||
|
||||
|
||||
def get_password_hash(password: str) -> str:
|
||||
"""
|
||||
Hashes a password using the bcrypt hashing algorithm.
|
||||
|
||||
:param password: The plaintext password to hash.
|
||||
:return: The hashed password
|
||||
"""
|
||||
return pwd_context.hash(password)
|
||||
|
||||
|
||||
def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]:
|
||||
"""
|
||||
Authenticates a user based on their username and password.
|
||||
|
||||
:param username: The username of the user to authenticate.
|
||||
:param password: The plaintext password to authenticate.
|
||||
:return: The authenticated user if the username and password are correct, otherwise False.
|
||||
"""
|
||||
user = get_user(username)
|
||||
if not user:
|
||||
return False
|
||||
if not verify_password(password, user.hashed_password):
|
||||
return False
|
||||
return user
|
|
@ -1,33 +0,0 @@
|
|||
from fastapi import APIRouter, HTTPException, status
|
||||
from api.core.users import UserCreate, UserInDB
|
||||
from api.auth.db_utils import get_user, get_password_hash, create_user
|
||||
|
||||
router = APIRouter(prefix="/register")
|
||||
|
||||
|
||||
@router.post("/", response_model=dict)
|
||||
async def register_new_user(user: UserCreate) -> dict:
|
||||
"""
|
||||
Creates new user
|
||||
:param user:
|
||||
:raises HTTPException: If the username already exists.
|
||||
:return: A dictionary containing a detail key with a success message.
|
||||
"""
|
||||
existing_user = get_user(user.username)
|
||||
if existing_user is not None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Username already exists",
|
||||
)
|
||||
|
||||
hashed_password = get_password_hash(user.password)
|
||||
user_create = UserInDB(
|
||||
username=user.username,
|
||||
email=user.email,
|
||||
full_name=user.full_name,
|
||||
hashed_password=hashed_password,
|
||||
disabled=False,
|
||||
)
|
||||
create_user(user_create)
|
||||
|
||||
return {"detail": "User created successfully"}
|
|
@ -1,30 +0,0 @@
|
|||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi.security import OAuth2PasswordRequestForm
|
||||
|
||||
from api.core.users import Token
|
||||
from api.auth.db_utils import authenticate_user
|
||||
from api.auth.auth_utils import create_access_token
|
||||
|
||||
router = APIRouter(prefix="/token")
|
||||
|
||||
|
||||
@router.post("/", response_model=Token)
|
||||
async def login_for_access_token(
|
||||
form_data: OAuth2PasswordRequestForm = Depends(),
|
||||
) -> Token:
|
||||
"""
|
||||
Authenticates a user and provides an access token.
|
||||
:param form_data: OAuth2PasswordRequestForm object containing the user's credentials.
|
||||
:raises HTTPException: If the user cannot be authenticated.
|
||||
:return: A Token object containing the access token and the token type.
|
||||
"""
|
||||
user = authenticate_user(form_data.username, form_data.password)
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Incorrect username or password",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
access_token = create_access_token(data={"sub": user.username})
|
||||
return Token(access_token=access_token, token_type="bearer")
|
|
@ -1,28 +0,0 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class User(BaseModel):
|
||||
username: str
|
||||
full_name: str
|
||||
email: str
|
||||
disabled: bool = False
|
||||
|
||||
|
||||
class UserCreate(BaseModel):
|
||||
username: str
|
||||
full_name: str
|
||||
email: str
|
||||
password: str
|
||||
|
||||
|
||||
class UserInDB(User):
|
||||
hashed_password: str
|
||||
|
||||
|
||||
class TokenData(BaseModel):
|
||||
username: str
|
||||
|
||||
|
||||
class Token(BaseModel):
|
||||
access_token: str
|
||||
token_type: str
|
|
@ -1,11 +0,0 @@
|
|||
from fastapi import APIRouter, Depends
|
||||
from .jobs import router as jobs_router
|
||||
from api.auth.auth_utils import get_active_current_user
|
||||
from settings import AUTH_REQUIRED
|
||||
|
||||
if AUTH_REQUIRED:
|
||||
router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)])
|
||||
else:
|
||||
router = APIRouter(prefix="/v1")
|
||||
|
||||
router.include_router(jobs_router)
|
16
main.py
16
main.py
|
@ -1,16 +0,0 @@
|
|||
from fastapi import FastAPI
|
||||
|
||||
from supabase_py import create_client, Client
|
||||
from api import router as api_router
|
||||
|
||||
app = FastAPI(
|
||||
title="JobSpy Backend",
|
||||
description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers",
|
||||
version="1.0.0",
|
||||
)
|
||||
app.include_router(api_router)
|
||||
|
||||
|
||||
@app.get("/health", tags=["health"])
|
||||
async def health_check():
|
||||
return {"message": "JobSpy ready to scrape"}
|
|
@ -0,0 +1,14 @@
|
|||
[tool.poetry]
|
||||
name = "jobspy"
|
||||
version = "0.1.0"
|
||||
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||
authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
14
settings.py
14
settings.py
|
@ -1,14 +0,0 @@
|
|||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
|
||||
GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
|
||||
|
||||
# optional autha
|
||||
AUTH_REQUIRED = False
|
||||
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
||||
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
||||
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60
|
||||
ALGORITHM = "HS256"
|
|
@ -3,11 +3,11 @@ from fastapi import APIRouter
|
|||
from fastapi.responses import StreamingResponse
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from api.core.scrapers.indeed import IndeedScraper
|
||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||
from api.core.scrapers.linkedin import LinkedInScraper
|
||||
from api.core.formatters.csv import CSVFormatter
|
||||
from api.core.scrapers import (
|
||||
from .core.scrapers.indeed import IndeedScraper
|
||||
from .core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||
from .core.scrapers.linkedin import LinkedInScraper
|
||||
from .core.formatters.csv import CSVFormatter
|
||||
from .core.scrapers import (
|
||||
ScraperInput,
|
||||
Site,
|
||||
JobResponse,
|
||||
|
@ -37,7 +37,7 @@ async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
|
|||
scraper_class = SCRAPER_MAPPING[site]
|
||||
scraper = scraper_class()
|
||||
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
||||
return (site.value, scraped_data)
|
||||
return site.value, scraped_data
|
||||
|
||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||
results = dict(executor.map(scrape_site, scraper_input.site_type))
|
|
@ -7,20 +7,20 @@ from datetime import datetime
|
|||
|
||||
from ...jobs import *
|
||||
from ...scrapers import *
|
||||
from settings import *
|
||||
|
||||
|
||||
class CSVFormatter:
|
||||
@staticmethod
|
||||
def fetch_job_urls(credentials: Any) -> set:
|
||||
def fetch_job_urls(credentials: Any, google_sheet_name: str) -> set:
|
||||
"""
|
||||
Fetches all the job urls from the google sheet to prevent duplicates
|
||||
:param credentials:
|
||||
:param google_sheet_name:
|
||||
:return: urls
|
||||
"""
|
||||
try:
|
||||
gc = gspread.authorize(credentials)
|
||||
sh = gc.open(GSHEET_NAME)
|
||||
sh = gc.open(google_sheet_name)
|
||||
|
||||
worksheet = sh.get_worksheet(0)
|
||||
data = worksheet.get_all_values()
|
||||
|
@ -32,10 +32,11 @@ class CSVFormatter:
|
|||
raise e
|
||||
|
||||
@staticmethod
|
||||
def upload_to_google_sheet(csv_data: str):
|
||||
def upload_to_google_sheet(csv_data: str, google_sheet_name: str):
|
||||
"""
|
||||
Appends rows to google sheet
|
||||
:param csv_data:
|
||||
:param google_sheet_name:
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
|
@ -48,7 +49,7 @@ class CSVFormatter:
|
|||
"client_secret.json", scope
|
||||
)
|
||||
gc = gspread.authorize(credentials)
|
||||
sh = gc.open(GSHEET_NAME)
|
||||
sh = gc.open(google_sheet_name)
|
||||
|
||||
worksheet = sh.get_worksheet(0)
|
||||
data_string = csv_data.getvalue()
|
|
@ -9,9 +9,8 @@ from bs4 import BeautifulSoup
|
|||
from bs4.element import Tag
|
||||
from fastapi import status
|
||||
|
||||
from api.core.jobs import *
|
||||
from api.core.jobs import JobPost
|
||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
||||
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||
from .. import Scraper, ScraperInput, Site, StatusException
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor, Future
|
||||
import math
|
|
@ -6,8 +6,8 @@ from bs4 import BeautifulSoup
|
|||
from bs4.element import Tag
|
||||
from fastapi import status
|
||||
|
||||
from api.core.scrapers import Scraper, ScraperInput, Site
|
||||
from api.core.jobs import *
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ...jobs import JobPost, Location, JobResponse, JobType
|
||||
|
||||
|
||||
class LinkedInScraper(Scraper):
|
|
@ -10,9 +10,8 @@ from bs4 import BeautifulSoup
|
|||
from bs4.element import Tag
|
||||
from concurrent.futures import ThreadPoolExecutor, Future
|
||||
|
||||
from api.core.jobs import JobPost
|
||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
||||
from api.core.jobs import *
|
||||
from ...scrapers import Scraper, ScraperInput, Site, StatusException
|
||||
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||
|
||||
|
||||
class ZipRecruiterScraper(Scraper):
|
Loading…
Reference in New Issue