sheets integration

2023-08-27 19:32:13 -05:00 · 2023-08-27 19:32:13 -05:00 · 9801f2a97e
parent d10dce6913
commit 9801f2a97e
8 changed files with 73 additions and 23 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,4 +4,5 @@
 /ven/
 **/__pycache__/
 *.pyc
-.env
+.env
+client_secret.json
--- a/JobSpy_GSheet_template.xlsx
+++ b/JobSpy_GSheet_template.xlsx
--- a/api/auth/auth_utils.py
+++ b/api/auth/auth_utils.py
@ -4,11 +4,9 @@ from jose import jwt, JWTError
 from fastapi import HTTPException, status, Depends
 from fastapi.security import OAuth2PasswordBearer

-from settings import *
 from api.core.users import TokenData
 from api.auth.db_utils import UserInDB, get_user

-load_dotenv()
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")


--- a/api/core/formatters/init.py
+++ b/api/core/formatters/init.py
@ -4,3 +4,4 @@ from enum import Enum
 class OutputFormat(Enum):
    CSV = "csv"
    JSON = "json"
+    GSHEET = "gsheet"
--- a/api/core/formatters/csv/init.py
+++ b/api/core/formatters/csv/init.py
@ -1,19 +1,50 @@
+import gspread
+from oauth2client.service_account import ServiceAccountCredentials
+
 import csv
 from io import StringIO
 from datetime import datetime

 from ...jobs import *
 from ...scrapers import *
-
-
-def generate_filename() -> str:
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    return f"JobSpy_results_{timestamp}.csv"
+from settings import *


 class CSVFormatter:
    @staticmethod
-    def format(jobs: ScraperResponse) -> StringIO:
+    def upload_to_google_sheet(csv_data: str):
+        try:
+            scope = [
+                "https://www.googleapis.com/auth/spreadsheets",
+                "https://www.googleapis.com/auth/drive.file",
+                "https://www.googleapis.com/auth/drive",
+            ]
+            credentials = ServiceAccountCredentials.from_json_keyfile_name(
+                GSHEET_JSON_KEY_PATH, scope
+            )
+            gc = gspread.authorize(credentials)
+            sh = gc.open(GSHEET_NAME)
+
+            worksheet = sh.get_worksheet(0)
+            data_string = csv_data.getvalue()
+            reader = csv.reader(StringIO(data_string))
+
+            rows = list(reader)
+
+            for i, row in enumerate(rows):
+                if i == 0:
+                    continue
+                worksheet.append_row(row)
+        except Exception as e:
+            raise e
+
+    @staticmethod
+    def generate_filename() -> str:
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        return f"JobSpy_results_{timestamp}.csv"
+
+    @staticmethod
+    def format(jobs: CommonResponse) -> StringIO:
        """
        Transfomr the jobs objects into csv
        :param jobs:
@ -41,7 +72,7 @@ class CSVFormatter:
        writer.writerow(headers)

        for site, job_response in jobs.dict().items():
-            if job_response and job_response.get("success"):
+            if isinstance(job_response, dict) and job_response.get("success"):
                for job in job_response["jobs"]:
                    writer.writerow(
                        [
--- a/api/core/scrapers/init.py
+++ b/api/core/scrapers/init.py
@ -1,6 +1,6 @@
 from ..jobs import *
 from ..formatters import OutputFormat
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Any


 class StatusException(Exception):
@ -28,10 +28,12 @@ class ScraperInput(BaseModel):
    results_wanted: int = 15


-class ScraperResponse(BaseModel):
-    linkedin: Optional[JobResponse]
-    indeed: Optional[JobResponse]
-    zip_recruiter: Optional[JobResponse]
+class CommonResponse(BaseModel):
+    status: Optional[str]
+    error: Optional[str]
+    linkedin: Optional[Any] = None
+    indeed: Optional[Any] = None
+    zip_recruiter: Optional[Any] = None


 class Scraper:
--- a/api/v1/jobs/init.py
+++ b/api/v1/jobs/init.py
@ -6,13 +6,13 @@ from concurrent.futures import ThreadPoolExecutor
 from api.core.scrapers.indeed import IndeedScraper
 from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
 from api.core.scrapers.linkedin import LinkedInScraper
-from api.core.formatters.csv import CSVFormatter, generate_filename
+from api.core.formatters.csv import CSVFormatter
 from api.core.scrapers import (
    ScraperInput,
    Site,
    JobResponse,
    OutputFormat,
-    ScraperResponse,
+    CommonResponse,
 )
 from typing import List, Dict, Tuple, Union

@ -26,7 +26,7 @@ SCRAPER_MAPPING = {


@router.post("/")
-async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse:
+async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
    """
    Asynchronously scrapes job data from multiple job sites.
    :param scraper_input:
@ -42,14 +42,26 @@ async def scrape_jobs(scraper_input: ScraperInput) -> ScraperResponse:
    with ThreadPoolExecutor() as executor:
        results = dict(executor.map(scrape_site, scraper_input.site_type))

-    scraper_response = ScraperResponse(**results)
+    scraper_response = CommonResponse(status="JSON response success", **results)

    if scraper_input.output_format == OutputFormat.CSV:
        csv_output = CSVFormatter.format(scraper_response)
        response = StreamingResponse(csv_output, media_type="text/csv")
        response.headers[
            "Content-Disposition"
-        ] = f"attachment; filename={generate_filename()}"
+        ] = f"attachment; filename={CSVFormatter.generate_filename()}"
        return response

-    return scraper_response
+    elif scraper_input.output_format == OutputFormat.GSHEET:
+        csv_output = CSVFormatter.format(scraper_response)
+        try:
+            CSVFormatter.upload_to_google_sheet(csv_output)
+            return CommonResponse(status="Successfully uploaded to Google Sheets")
+
+        except Exception as e:
+            return CommonResponse(
+                status="Failed to upload to Google Sheet", error=str(e)
+            )
+
+    else:
+        return scraper_response
--- a/settings.py
+++ b/settings.py
@ -2,9 +2,14 @@ from dotenv import load_dotenv
 import os

 load_dotenv()
+# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1HAnn-aPv-BO4QTEzfIWc-5iw50duyMoTgX8o3RsEOWs/edit?usp=sharing)
+GSHEET_JSON_KEY_PATH = "client_secret.json"
+GSHEET_NAME = "JobSpy"
+
+# optional autha
+AUTH_REQUIRED = False
 SUPABASE_URL = os.environ.get("SUPABASE_URL")
 SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
 JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
-ALGORITHM = "HS256"
 ACCESS_TOKEN_EXPIRE_MINUTES = 60
-AUTH_REQUIRED = False
+ALGORITHM = "HS256"