mirror of https://github.com/Bunsly/JobSpy
sheets integration
parent
d10dce6913
commit
9801f2a97e
|
@ -4,4 +4,5 @@
|
|||
/ven/
|
||||
**/__pycache__/
|
||||
*.pyc
|
||||
.env
|
||||
.env
|
||||
client_secret.json
|
|
@ -4,11 +4,9 @@ from jose import jwt, JWTError
|
|||
from fastapi import HTTPException, status, Depends
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
|
||||
from settings import *
|
||||
from api.core.users import TokenData
|
||||
from api.auth.db_utils import UserInDB, get_user
|
||||
|
||||
load_dotenv()
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
|
||||
|
||||
|
||||
|
|
|
@ -4,3 +4,4 @@ from enum import Enum
|
|||
class OutputFormat(Enum):
|
||||
CSV = "csv"
|
||||
JSON = "json"
|
||||
GSHEET = "gsheet"
|
||||
|
|
|
@ -1,19 +1,50 @@
|
|||
import gspread
|
||||
from oauth2client.service_account import ServiceAccountCredentials
|
||||
|
||||
import csv
|
||||
from io import StringIO
|
||||
from datetime import datetime
|
||||
|
||||
from ...jobs import *
|
||||
from ...scrapers import *
|
||||
|
||||
|
||||
def generate_filename() -> str:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return f"JobSpy_results_{timestamp}.csv"
|
||||
from settings import *
|
||||
|
||||
|
||||
class CSVFormatter:
|
||||
@staticmethod
|
||||
def format(jobs: ScraperResponse) -> StringIO:
|
||||
def upload_to_google_sheet(csv_data: str):
|
||||
try:
|
||||
scope = [
|
||||
"https://www.googleapis.com/auth/spreadsheets",
|
||||
"https://www.googleapis.com/auth/drive.file",
|
||||
"https://www.googleapis.com/auth/drive",
|
||||
]
|
||||
credentials = ServiceAccountCredentials.from_json_keyfile_name(
|
||||
GSHEET_JSON_KEY_PATH, scope
|
||||
)
|
||||
gc = gspread.authorize(credentials)
|
||||
sh = gc.open(GSHEET_NAME)
|
||||
|
||||
worksheet = sh.get_worksheet(0)
|
||||
data_string = csv_data.getvalue()
|
||||
reader = csv.reader(StringIO(data_string))
|
||||
|
||||
rows = list(reader)
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
if i == 0:
|
||||
continue
|
||||
worksheet.append_row(row)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def generate_filename() -> str:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return f"JobSpy_results_{timestamp}.csv"
|
||||
|
||||
@staticmethod
|
||||
def format(jobs: CommonResponse) -> StringIO:
|
||||
"""
|
||||
Transfomr the jobs objects into csv
|
||||
:param jobs:
|
||||
|
@ -41,7 +72,7 @@ class CSVFormatter:
|
|||
writer.writerow(headers)
|
||||
|
||||
for site, job_response in jobs.dict().items():
|
||||
if job_response and job_response.get("success"):
|
||||
if isinstance(job_response, dict) and job_response.get("success"):
|
||||
for job in job_response["jobs"]:
|
||||
writer.writerow(
|
||||
[
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from ..jobs import *
|
||||
from ..formatters import OutputFormat
|
||||
from typing import List, Dict, Optional
|
||||
from typing import List, Dict, Optional, Any
|
||||
|
||||
|
||||
class StatusException(Exception):
|
||||
|
@ -28,10 +28,12 @@ class ScraperInput(BaseModel):
|
|||
results_wanted: int = 15
|
||||
|
||||
|
||||
class ScraperResponse(BaseModel):
|
||||
linkedin: Optional[JobResponse]
|
||||
indeed: Optional[JobResponse]
|
||||
zip_recruiter: Optional[JobResponse]
|
||||
class CommonResponse(BaseModel):
|
||||
status: Optional[str]
|
||||
error: Optional[str]
|
||||
linkedin: Optional[Any] = None
|
||||
indeed: Optional[Any] = None
|
||||
zip_recruiter: Optional[Any] = None
|
||||
|
||||
|
||||
class Scraper:
|
||||
|
|
|
@ -6,13 +6,13 @@ from concurrent.futures import ThreadPoolExecutor
|
|||
from api.core.scrapers.indeed import IndeedScraper
|
||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||
from api.core.scrapers.linkedin import LinkedInScraper
|
||||
from api.core.formatters.csv import CSVFormatter, generate_filename
|
||||
from api.core.formatters.csv import CSVFormatter
|
||||
from api.core.scrapers import (
|
||||
ScraperInput,
|
||||
Site,
|
||||
JobResponse,
|
||||
OutputFormat,
|
||||
ScraperResponse,
|
||||
CommonResponse,
|
||||
)
|
||||
from typing import List, Dict, Tuple, Union
|
||||
|
||||
|
@ -26,7 +26,7 @@ SCRAPER_MAPPING = {
|
|||
|
||||
|
||||
@router.post("/")
|
||||
async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse:
|
||||
async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
|
||||
"""
|
||||
Asynchronously scrapes job data from multiple job sites.
|
||||
:param scraper_input:
|
||||
|
@ -42,14 +42,26 @@ async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse:
|
|||
with ThreadPoolExecutor() as executor:
|
||||
results = dict(executor.map(scrape_site, scraper_input.site_type))
|
||||
|
||||
scraper_response = ScraperResponse(**results)
|
||||
scraper_response = CommonResponse(status="JSON response success", **results)
|
||||
|
||||
if scraper_input.output_format == OutputFormat.CSV:
|
||||
csv_output = CSVFormatter.format(scraper_response)
|
||||
response = StreamingResponse(csv_output, media_type="text/csv")
|
||||
response.headers[
|
||||
"Content-Disposition"
|
||||
] = f"attachment; filename={generate_filename()}"
|
||||
] = f"attachment; filename={CSVFormatter.generate_filename()}"
|
||||
return response
|
||||
|
||||
return scraper_response
|
||||
elif scraper_input.output_format == OutputFormat.GSHEET:
|
||||
csv_output = CSVFormatter.format(scraper_response)
|
||||
try:
|
||||
CSVFormatter.upload_to_google_sheet(csv_output)
|
||||
return CommonResponse(status="Successfully uploaded to Google Sheets")
|
||||
|
||||
except Exception as e:
|
||||
return CommonResponse(
|
||||
status="Failed to upload to Google Sheet", error=str(e)
|
||||
)
|
||||
|
||||
else:
|
||||
return scraper_response
|
||||
|
|
|
@ -2,9 +2,14 @@ from dotenv import load_dotenv
|
|||
import os
|
||||
|
||||
load_dotenv()
|
||||
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1HAnn-aPv-BO4QTEzfIWc-5iw50duyMoTgX8o3RsEOWs/edit?usp=sharing)
|
||||
GSHEET_JSON_KEY_PATH = "client_secret.json"
|
||||
GSHEET_NAME = "JobSpy"
|
||||
|
||||
# optional autha
|
||||
AUTH_REQUIRED = False
|
||||
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
||||
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
||||
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
||||
ALGORITHM = "HS256"
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60
|
||||
AUTH_REQUIRED = False
|
||||
ALGORITHM = "HS256"
|
||||
|
|
Loading…
Reference in New Issue