sheets integration

pull/22/head
Cullen Watson 2023-08-27 19:32:13 -05:00
parent d10dce6913
commit 9801f2a97e
8 changed files with 73 additions and 23 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@
**/__pycache__/ **/__pycache__/
*.pyc *.pyc
.env .env
client_secret.json

View File

View File

@ -4,11 +4,9 @@ from jose import jwt, JWTError
from fastapi import HTTPException, status, Depends from fastapi import HTTPException, status, Depends
from fastapi.security import OAuth2PasswordBearer from fastapi.security import OAuth2PasswordBearer
from settings import *
from api.core.users import TokenData from api.core.users import TokenData
from api.auth.db_utils import UserInDB, get_user from api.auth.db_utils import UserInDB, get_user
load_dotenv()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token") oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")

View File

@ -4,3 +4,4 @@ from enum import Enum
class OutputFormat(Enum): class OutputFormat(Enum):
CSV = "csv" CSV = "csv"
JSON = "json" JSON = "json"
GSHEET = "gsheet"

View File

@ -1,19 +1,50 @@
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import csv import csv
from io import StringIO from io import StringIO
from datetime import datetime from datetime import datetime
from ...jobs import * from ...jobs import *
from ...scrapers import * from ...scrapers import *
from settings import *
def generate_filename() -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
class CSVFormatter: class CSVFormatter:
@staticmethod @staticmethod
def format(jobs: ScraperResponse) -> StringIO: def upload_to_google_sheet(csv_data: str):
try:
scope = [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive",
]
credentials = ServiceAccountCredentials.from_json_keyfile_name(
GSHEET_JSON_KEY_PATH, scope
)
gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME)
worksheet = sh.get_worksheet(0)
data_string = csv_data.getvalue()
reader = csv.reader(StringIO(data_string))
rows = list(reader)
for i, row in enumerate(rows):
if i == 0:
continue
worksheet.append_row(row)
except Exception as e:
raise e
@staticmethod
def generate_filename() -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
@staticmethod
def format(jobs: CommonResponse) -> StringIO:
""" """
Transfomr the jobs objects into csv Transfomr the jobs objects into csv
:param jobs: :param jobs:
@ -41,7 +72,7 @@ class CSVFormatter:
writer.writerow(headers) writer.writerow(headers)
for site, job_response in jobs.dict().items(): for site, job_response in jobs.dict().items():
if job_response and job_response.get("success"): if isinstance(job_response, dict) and job_response.get("success"):
for job in job_response["jobs"]: for job in job_response["jobs"]:
writer.writerow( writer.writerow(
[ [

View File

@ -1,6 +1,6 @@
from ..jobs import * from ..jobs import *
from ..formatters import OutputFormat from ..formatters import OutputFormat
from typing import List, Dict, Optional from typing import List, Dict, Optional, Any
class StatusException(Exception): class StatusException(Exception):
@ -28,10 +28,12 @@ class ScraperInput(BaseModel):
results_wanted: int = 15 results_wanted: int = 15
class ScraperResponse(BaseModel): class CommonResponse(BaseModel):
linkedin: Optional[JobResponse] status: Optional[str]
indeed: Optional[JobResponse] error: Optional[str]
zip_recruiter: Optional[JobResponse] linkedin: Optional[Any] = None
indeed: Optional[Any] = None
zip_recruiter: Optional[Any] = None
class Scraper: class Scraper:

View File

@ -6,13 +6,13 @@ from concurrent.futures import ThreadPoolExecutor
from api.core.scrapers.indeed import IndeedScraper from api.core.scrapers.indeed import IndeedScraper
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
from api.core.scrapers.linkedin import LinkedInScraper from api.core.scrapers.linkedin import LinkedInScraper
from api.core.formatters.csv import CSVFormatter, generate_filename from api.core.formatters.csv import CSVFormatter
from api.core.scrapers import ( from api.core.scrapers import (
ScraperInput, ScraperInput,
Site, Site,
JobResponse, JobResponse,
OutputFormat, OutputFormat,
ScraperResponse, CommonResponse,
) )
from typing import List, Dict, Tuple, Union from typing import List, Dict, Tuple, Union
@ -26,7 +26,7 @@ SCRAPER_MAPPING = {
@router.post("/") @router.post("/")
async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse: async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
""" """
Asynchronously scrapes job data from multiple job sites. Asynchronously scrapes job data from multiple job sites.
:param scraper_input: :param scraper_input:
@ -42,14 +42,26 @@ async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse:
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
results = dict(executor.map(scrape_site, scraper_input.site_type)) results = dict(executor.map(scrape_site, scraper_input.site_type))
scraper_response = ScraperResponse(**results) scraper_response = CommonResponse(status="JSON response success", **results)
if scraper_input.output_format == OutputFormat.CSV: if scraper_input.output_format == OutputFormat.CSV:
csv_output = CSVFormatter.format(scraper_response) csv_output = CSVFormatter.format(scraper_response)
response = StreamingResponse(csv_output, media_type="text/csv") response = StreamingResponse(csv_output, media_type="text/csv")
response.headers[ response.headers[
"Content-Disposition" "Content-Disposition"
] = f"attachment; filename={generate_filename()}" ] = f"attachment; filename={CSVFormatter.generate_filename()}"
return response return response
elif scraper_input.output_format == OutputFormat.GSHEET:
csv_output = CSVFormatter.format(scraper_response)
try:
CSVFormatter.upload_to_google_sheet(csv_output)
return CommonResponse(status="Successfully uploaded to Google Sheets")
except Exception as e:
return CommonResponse(
status="Failed to upload to Google Sheet", error=str(e)
)
else:
return scraper_response return scraper_response

View File

@ -2,9 +2,14 @@ from dotenv import load_dotenv
import os import os
load_dotenv() load_dotenv()
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1HAnn-aPv-BO4QTEzfIWc-5iw50duyMoTgX8o3RsEOWs/edit?usp=sharing)
GSHEET_JSON_KEY_PATH = "client_secret.json"
GSHEET_NAME = "JobSpy"
# optional autha
AUTH_REQUIRED = False
SUPABASE_URL = os.environ.get("SUPABASE_URL") SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY") SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY") JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 60 ACCESS_TOKEN_EXPIRE_MINUTES = 60
AUTH_REQUIRED = False ALGORITHM = "HS256"