sheets integration

pull/22/head
Cullen Watson 2023-08-27 19:32:13 -05:00
parent d10dce6913
commit 9801f2a97e
8 changed files with 73 additions and 23 deletions

3
.gitignore vendored
View File

@ -4,4 +4,5 @@
/ven/
**/__pycache__/
*.pyc
.env
.env
client_secret.json

View File

View File

@ -4,11 +4,9 @@ from jose import jwt, JWTError
from fastapi import HTTPException, status, Depends
from fastapi.security import OAuth2PasswordBearer
from settings import *
from api.core.users import TokenData
from api.auth.db_utils import UserInDB, get_user
load_dotenv()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")

View File

@ -4,3 +4,4 @@ from enum import Enum
class OutputFormat(Enum):
CSV = "csv"
JSON = "json"
GSHEET = "gsheet"

View File

@ -1,19 +1,50 @@
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import csv
from io import StringIO
from datetime import datetime
from ...jobs import *
from ...scrapers import *
def generate_filename() -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
from settings import *
class CSVFormatter:
@staticmethod
def format(jobs: ScraperResponse) -> StringIO:
def upload_to_google_sheet(csv_data: str):
try:
scope = [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive",
]
credentials = ServiceAccountCredentials.from_json_keyfile_name(
GSHEET_JSON_KEY_PATH, scope
)
gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME)
worksheet = sh.get_worksheet(0)
data_string = csv_data.getvalue()
reader = csv.reader(StringIO(data_string))
rows = list(reader)
for i, row in enumerate(rows):
if i == 0:
continue
worksheet.append_row(row)
except Exception as e:
raise e
@staticmethod
def generate_filename() -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
@staticmethod
def format(jobs: CommonResponse) -> StringIO:
"""
Transfomr the jobs objects into csv
:param jobs:
@ -41,7 +72,7 @@ class CSVFormatter:
writer.writerow(headers)
for site, job_response in jobs.dict().items():
if job_response and job_response.get("success"):
if isinstance(job_response, dict) and job_response.get("success"):
for job in job_response["jobs"]:
writer.writerow(
[

View File

@ -1,6 +1,6 @@
from ..jobs import *
from ..formatters import OutputFormat
from typing import List, Dict, Optional
from typing import List, Dict, Optional, Any
class StatusException(Exception):
@ -28,10 +28,12 @@ class ScraperInput(BaseModel):
results_wanted: int = 15
class ScraperResponse(BaseModel):
linkedin: Optional[JobResponse]
indeed: Optional[JobResponse]
zip_recruiter: Optional[JobResponse]
class CommonResponse(BaseModel):
status: Optional[str]
error: Optional[str]
linkedin: Optional[Any] = None
indeed: Optional[Any] = None
zip_recruiter: Optional[Any] = None
class Scraper:

View File

@ -6,13 +6,13 @@ from concurrent.futures import ThreadPoolExecutor
from api.core.scrapers.indeed import IndeedScraper
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
from api.core.scrapers.linkedin import LinkedInScraper
from api.core.formatters.csv import CSVFormatter, generate_filename
from api.core.formatters.csv import CSVFormatter
from api.core.scrapers import (
ScraperInput,
Site,
JobResponse,
OutputFormat,
ScraperResponse,
CommonResponse,
)
from typing import List, Dict, Tuple, Union
@ -26,7 +26,7 @@ SCRAPER_MAPPING = {
@router.post("/")
async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse:
async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
"""
Asynchronously scrapes job data from multiple job sites.
:param scraper_input:
@ -42,14 +42,26 @@ async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse:
with ThreadPoolExecutor() as executor:
results = dict(executor.map(scrape_site, scraper_input.site_type))
scraper_response = ScraperResponse(**results)
scraper_response = CommonResponse(status="JSON response success", **results)
if scraper_input.output_format == OutputFormat.CSV:
csv_output = CSVFormatter.format(scraper_response)
response = StreamingResponse(csv_output, media_type="text/csv")
response.headers[
"Content-Disposition"
] = f"attachment; filename={generate_filename()}"
] = f"attachment; filename={CSVFormatter.generate_filename()}"
return response
return scraper_response
elif scraper_input.output_format == OutputFormat.GSHEET:
csv_output = CSVFormatter.format(scraper_response)
try:
CSVFormatter.upload_to_google_sheet(csv_output)
return CommonResponse(status="Successfully uploaded to Google Sheets")
except Exception as e:
return CommonResponse(
status="Failed to upload to Google Sheet", error=str(e)
)
else:
return scraper_response

View File

@ -2,9 +2,14 @@ from dotenv import load_dotenv
import os
load_dotenv()
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1HAnn-aPv-BO4QTEzfIWc-5iw50duyMoTgX8o3RsEOWs/edit?usp=sharing)
GSHEET_JSON_KEY_PATH = "client_secret.json"
GSHEET_NAME = "JobSpy"
# optional autha
AUTH_REQUIRED = False
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 60
AUTH_REQUIRED = False
ALGORITHM = "HS256"