mirror of https://github.com/Bunsly/JobSpy
- init
parent
7efece8fe9
commit
9a86d2b1f5
|
@ -40,7 +40,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# not checking currently because of bad ip at Github's servers being blocked
|
# not checking currently because of bad ip at Github's servers being blocked
|
||||||
# - name: Check HTTP status to POST /api/v1/jobs/
|
# - name: Check HTTP status to POST /src/v1/jobs/
|
||||||
# run: |
|
# run: |
|
||||||
# response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
|
# response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
|
||||||
# "site_type": ["indeed", "linkedin"],
|
# "site_type": ["indeed", "linkedin"],
|
||||||
|
|
|
@ -1,9 +0,0 @@
|
||||||
from fastapi import APIRouter
|
|
||||||
from api.auth import router as auth_router
|
|
||||||
from .v1 import router as v1_router
|
|
||||||
|
|
||||||
router = APIRouter(
|
|
||||||
prefix="/api",
|
|
||||||
)
|
|
||||||
router.include_router(v1_router)
|
|
||||||
router.include_router(auth_router)
|
|
|
@ -1,8 +0,0 @@
|
||||||
from fastapi import APIRouter
|
|
||||||
|
|
||||||
from api.auth.token import router as token_router
|
|
||||||
from api.auth.register import router as register_router
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/auth", tags=["auth"])
|
|
||||||
router.include_router(token_router)
|
|
||||||
router.include_router(register_router)
|
|
|
@ -1,65 +0,0 @@
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from jose import jwt, JWTError
|
|
||||||
from fastapi import HTTPException, status, Depends
|
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
|
||||||
|
|
||||||
from api.core.users import TokenData
|
|
||||||
from api.auth.db_utils import UserInDB, get_user
|
|
||||||
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
|
|
||||||
|
|
||||||
|
|
||||||
def create_access_token(data: dict) -> str:
|
|
||||||
"""
|
|
||||||
Creates a JWT token based on the data provided.
|
|
||||||
:param data
|
|
||||||
:return: encoded_jwt
|
|
||||||
"""
|
|
||||||
to_encode = data.copy()
|
|
||||||
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
|
||||||
to_encode.update({"exp": expire})
|
|
||||||
encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM)
|
|
||||||
return encoded_jwt
|
|
||||||
|
|
||||||
|
|
||||||
async def get_current_user(token: str = Depends(oauth2_scheme)):
|
|
||||||
"""
|
|
||||||
Returns the current user associated with the provided JWT token.
|
|
||||||
:param token
|
|
||||||
:raises HTTPException: If the token is invalid or the user does not exist.
|
|
||||||
:return: The UserInDB instance associated with the token.
|
|
||||||
"""
|
|
||||||
credential_exception = HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Could not validate credentials",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
|
|
||||||
username: str = payload.get("sub")
|
|
||||||
if username is None:
|
|
||||||
raise credential_exception
|
|
||||||
token_data = TokenData(username=username)
|
|
||||||
except JWTError:
|
|
||||||
raise credential_exception
|
|
||||||
|
|
||||||
current_user = get_user(token_data.username)
|
|
||||||
if current_user is None:
|
|
||||||
raise credential_exception
|
|
||||||
return current_user
|
|
||||||
|
|
||||||
|
|
||||||
async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)):
|
|
||||||
"""
|
|
||||||
Returns the current user if the user account is active.
|
|
||||||
|
|
||||||
:param current_user: A UserInDB instance representing the current user.
|
|
||||||
:raises HTTPException: If the user account is inactive.
|
|
||||||
:return: The UserInDB instance if the user account is active.
|
|
||||||
"""
|
|
||||||
if current_user.disabled:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user."
|
|
||||||
)
|
|
||||||
return current_user
|
|
|
@ -1,89 +0,0 @@
|
||||||
from typing import Optional, Union
|
|
||||||
|
|
||||||
from passlib.context import CryptContext
|
|
||||||
from supabase_py import create_client, Client
|
|
||||||
from fastapi import HTTPException, status
|
|
||||||
|
|
||||||
from api.core.users import UserInDB
|
|
||||||
from settings import SUPABASE_URL, SUPABASE_KEY
|
|
||||||
|
|
||||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
||||||
if SUPABASE_URL:
|
|
||||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
||||||
|
|
||||||
|
|
||||||
def create_user(user_create: UserInDB):
|
|
||||||
"""
|
|
||||||
Creates a new user record in the 'users' table in Supabase.
|
|
||||||
|
|
||||||
:param user_create: The data of the user to be created.
|
|
||||||
:raises HTTPException: If an error occurs while creating the user.
|
|
||||||
:return: The result of the insert operation.
|
|
||||||
"""
|
|
||||||
result = supabase.table("users").insert(user_create.dict()).execute()
|
|
||||||
print(f"Insert result: {result}")
|
|
||||||
|
|
||||||
if "error" in result and result["error"]:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
||||||
detail=f"User could not be created due to {result['error']['message']}",
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def get_user(username: str) -> Optional[UserInDB]:
|
|
||||||
"""
|
|
||||||
Retrieves a user from the 'users' table by their username.
|
|
||||||
|
|
||||||
:param username: The username of the user to retrieve.
|
|
||||||
:return: The user data if found, otherwise None.
|
|
||||||
"""
|
|
||||||
result = supabase.table("users").select().eq("username", username).execute()
|
|
||||||
|
|
||||||
if "error" in result and result["error"]:
|
|
||||||
print(f"Error: {result['error']['message']}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
if result["data"]:
|
|
||||||
user_data = result["data"][0]
|
|
||||||
return UserInDB(**user_data)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def verify_password(password: str, hashed_password: str) -> bool:
|
|
||||||
"""
|
|
||||||
Verifies a password against a hashed password using the bcrypt hashing algorithm.
|
|
||||||
|
|
||||||
:param password: The plaintext password to verify.
|
|
||||||
:param hashed_password: The hashed password to compare against.
|
|
||||||
:return: True if the password matches the hashed password, otherwise False.
|
|
||||||
"""
|
|
||||||
return pwd_context.verify(password, hashed_password)
|
|
||||||
|
|
||||||
|
|
||||||
def get_password_hash(password: str) -> str:
|
|
||||||
"""
|
|
||||||
Hashes a password using the bcrypt hashing algorithm.
|
|
||||||
|
|
||||||
:param password: The plaintext password to hash.
|
|
||||||
:return: The hashed password
|
|
||||||
"""
|
|
||||||
return pwd_context.hash(password)
|
|
||||||
|
|
||||||
|
|
||||||
def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]:
|
|
||||||
"""
|
|
||||||
Authenticates a user based on their username and password.
|
|
||||||
|
|
||||||
:param username: The username of the user to authenticate.
|
|
||||||
:param password: The plaintext password to authenticate.
|
|
||||||
:return: The authenticated user if the username and password are correct, otherwise False.
|
|
||||||
"""
|
|
||||||
user = get_user(username)
|
|
||||||
if not user:
|
|
||||||
return False
|
|
||||||
if not verify_password(password, user.hashed_password):
|
|
||||||
return False
|
|
||||||
return user
|
|
|
@ -1,33 +0,0 @@
|
||||||
from fastapi import APIRouter, HTTPException, status
|
|
||||||
from api.core.users import UserCreate, UserInDB
|
|
||||||
from api.auth.db_utils import get_user, get_password_hash, create_user
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/register")
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=dict)
|
|
||||||
async def register_new_user(user: UserCreate) -> dict:
|
|
||||||
"""
|
|
||||||
Creates new user
|
|
||||||
:param user:
|
|
||||||
:raises HTTPException: If the username already exists.
|
|
||||||
:return: A dictionary containing a detail key with a success message.
|
|
||||||
"""
|
|
||||||
existing_user = get_user(user.username)
|
|
||||||
if existing_user is not None:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail="Username already exists",
|
|
||||||
)
|
|
||||||
|
|
||||||
hashed_password = get_password_hash(user.password)
|
|
||||||
user_create = UserInDB(
|
|
||||||
username=user.username,
|
|
||||||
email=user.email,
|
|
||||||
full_name=user.full_name,
|
|
||||||
hashed_password=hashed_password,
|
|
||||||
disabled=False,
|
|
||||||
)
|
|
||||||
create_user(user_create)
|
|
||||||
|
|
||||||
return {"detail": "User created successfully"}
|
|
|
@ -1,30 +0,0 @@
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status
|
|
||||||
from fastapi.security import OAuth2PasswordRequestForm
|
|
||||||
|
|
||||||
from api.core.users import Token
|
|
||||||
from api.auth.db_utils import authenticate_user
|
|
||||||
from api.auth.auth_utils import create_access_token
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/token")
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=Token)
|
|
||||||
async def login_for_access_token(
|
|
||||||
form_data: OAuth2PasswordRequestForm = Depends(),
|
|
||||||
) -> Token:
|
|
||||||
"""
|
|
||||||
Authenticates a user and provides an access token.
|
|
||||||
:param form_data: OAuth2PasswordRequestForm object containing the user's credentials.
|
|
||||||
:raises HTTPException: If the user cannot be authenticated.
|
|
||||||
:return: A Token object containing the access token and the token type.
|
|
||||||
"""
|
|
||||||
user = authenticate_user(form_data.username, form_data.password)
|
|
||||||
if not user:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Incorrect username or password",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
|
|
||||||
access_token = create_access_token(data={"sub": user.username})
|
|
||||||
return Token(access_token=access_token, token_type="bearer")
|
|
|
@ -1,28 +0,0 @@
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
|
|
||||||
class User(BaseModel):
|
|
||||||
username: str
|
|
||||||
full_name: str
|
|
||||||
email: str
|
|
||||||
disabled: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
class UserCreate(BaseModel):
|
|
||||||
username: str
|
|
||||||
full_name: str
|
|
||||||
email: str
|
|
||||||
password: str
|
|
||||||
|
|
||||||
|
|
||||||
class UserInDB(User):
|
|
||||||
hashed_password: str
|
|
||||||
|
|
||||||
|
|
||||||
class TokenData(BaseModel):
|
|
||||||
username: str
|
|
||||||
|
|
||||||
|
|
||||||
class Token(BaseModel):
|
|
||||||
access_token: str
|
|
||||||
token_type: str
|
|
|
@ -1,11 +0,0 @@
|
||||||
from fastapi import APIRouter, Depends
|
|
||||||
from .jobs import router as jobs_router
|
|
||||||
from api.auth.auth_utils import get_active_current_user
|
|
||||||
from settings import AUTH_REQUIRED
|
|
||||||
|
|
||||||
if AUTH_REQUIRED:
|
|
||||||
router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)])
|
|
||||||
else:
|
|
||||||
router = APIRouter(prefix="/v1")
|
|
||||||
|
|
||||||
router.include_router(jobs_router)
|
|
16
main.py
16
main.py
|
@ -1,16 +0,0 @@
|
||||||
from fastapi import FastAPI
|
|
||||||
|
|
||||||
from supabase_py import create_client, Client
|
|
||||||
from api import router as api_router
|
|
||||||
|
|
||||||
app = FastAPI(
|
|
||||||
title="JobSpy Backend",
|
|
||||||
description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers",
|
|
||||||
version="1.0.0",
|
|
||||||
)
|
|
||||||
app.include_router(api_router)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health", tags=["health"])
|
|
||||||
async def health_check():
|
|
||||||
return {"message": "JobSpy ready to scrape"}
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "jobspy"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||||
|
authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>"]
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.10"
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
14
settings.py
14
settings.py
|
@ -1,14 +0,0 @@
|
||||||
from dotenv import load_dotenv
|
|
||||||
import os
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
|
|
||||||
GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
|
|
||||||
|
|
||||||
# optional autha
|
|
||||||
AUTH_REQUIRED = False
|
|
||||||
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
|
||||||
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
|
||||||
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
|
||||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60
|
|
||||||
ALGORITHM = "HS256"
|
|
|
@ -3,11 +3,11 @@ from fastapi import APIRouter
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from api.core.scrapers.indeed import IndeedScraper
|
from .core.scrapers.indeed import IndeedScraper
|
||||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
from .core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||||
from api.core.scrapers.linkedin import LinkedInScraper
|
from .core.scrapers.linkedin import LinkedInScraper
|
||||||
from api.core.formatters.csv import CSVFormatter
|
from .core.formatters.csv import CSVFormatter
|
||||||
from api.core.scrapers import (
|
from .core.scrapers import (
|
||||||
ScraperInput,
|
ScraperInput,
|
||||||
Site,
|
Site,
|
||||||
JobResponse,
|
JobResponse,
|
||||||
|
@ -37,7 +37,7 @@ async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
|
||||||
scraper_class = SCRAPER_MAPPING[site]
|
scraper_class = SCRAPER_MAPPING[site]
|
||||||
scraper = scraper_class()
|
scraper = scraper_class()
|
||||||
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
||||||
return (site.value, scraped_data)
|
return site.value, scraped_data
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
results = dict(executor.map(scrape_site, scraper_input.site_type))
|
results = dict(executor.map(scrape_site, scraper_input.site_type))
|
|
@ -7,20 +7,20 @@ from datetime import datetime
|
||||||
|
|
||||||
from ...jobs import *
|
from ...jobs import *
|
||||||
from ...scrapers import *
|
from ...scrapers import *
|
||||||
from settings import *
|
|
||||||
|
|
||||||
|
|
||||||
class CSVFormatter:
|
class CSVFormatter:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def fetch_job_urls(credentials: Any) -> set:
|
def fetch_job_urls(credentials: Any, google_sheet_name: str) -> set:
|
||||||
"""
|
"""
|
||||||
Fetches all the job urls from the google sheet to prevent duplicates
|
Fetches all the job urls from the google sheet to prevent duplicates
|
||||||
:param credentials:
|
:param credentials:
|
||||||
|
:param google_sheet_name:
|
||||||
:return: urls
|
:return: urls
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
gc = gspread.authorize(credentials)
|
gc = gspread.authorize(credentials)
|
||||||
sh = gc.open(GSHEET_NAME)
|
sh = gc.open(google_sheet_name)
|
||||||
|
|
||||||
worksheet = sh.get_worksheet(0)
|
worksheet = sh.get_worksheet(0)
|
||||||
data = worksheet.get_all_values()
|
data = worksheet.get_all_values()
|
||||||
|
@ -32,10 +32,11 @@ class CSVFormatter:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def upload_to_google_sheet(csv_data: str):
|
def upload_to_google_sheet(csv_data: str, google_sheet_name: str):
|
||||||
"""
|
"""
|
||||||
Appends rows to google sheet
|
Appends rows to google sheet
|
||||||
:param csv_data:
|
:param csv_data:
|
||||||
|
:param google_sheet_name:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
@ -48,7 +49,7 @@ class CSVFormatter:
|
||||||
"client_secret.json", scope
|
"client_secret.json", scope
|
||||||
)
|
)
|
||||||
gc = gspread.authorize(credentials)
|
gc = gspread.authorize(credentials)
|
||||||
sh = gc.open(GSHEET_NAME)
|
sh = gc.open(google_sheet_name)
|
||||||
|
|
||||||
worksheet = sh.get_worksheet(0)
|
worksheet = sh.get_worksheet(0)
|
||||||
data_string = csv_data.getvalue()
|
data_string = csv_data.getvalue()
|
|
@ -9,9 +9,8 @@ from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
from api.core.jobs import *
|
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||||
from api.core.jobs import JobPost
|
from .. import Scraper, ScraperInput, Site, StatusException
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
|
||||||
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, Future
|
from concurrent.futures import ThreadPoolExecutor, Future
|
||||||
import math
|
import math
|
|
@ -6,8 +6,8 @@ from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from fastapi import status
|
from fastapi import status
|
||||||
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site
|
from .. import Scraper, ScraperInput, Site
|
||||||
from api.core.jobs import *
|
from ...jobs import JobPost, Location, JobResponse, JobType
|
||||||
|
|
||||||
|
|
||||||
class LinkedInScraper(Scraper):
|
class LinkedInScraper(Scraper):
|
|
@ -10,9 +10,8 @@ from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from concurrent.futures import ThreadPoolExecutor, Future
|
from concurrent.futures import ThreadPoolExecutor, Future
|
||||||
|
|
||||||
from api.core.jobs import JobPost
|
from ...scrapers import Scraper, ScraperInput, Site, StatusException
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||||
from api.core.jobs import *
|
|
||||||
|
|
||||||
|
|
||||||
class ZipRecruiterScraper(Scraper):
|
class ZipRecruiterScraper(Scraper):
|
Loading…
Reference in New Issue