Add Csv output (#20)

2026-03-05 03:54:31 -08:00 · 2023-08-27 16:25:48 -05:00
parent 32a5bb37cd
commit 80a02faa75
8 changed files with 230 additions and 50 deletions
--- a/api/core/formatters/init.py
+++ b/api/core/formatters/init.py
@@ -0,0 +1,6 @@
+from enum import Enum
+
+
+class OutputFormat(Enum):
+    CSV = "csv"
+    JSON = "json"
--- a/api/core/formatters/csv/init.py
+++ b/api/core/formatters/csv/init.py
@@ -0,0 +1,74 @@
+import csv
+from io import StringIO
+from datetime import datetime
+
+from ...jobs import *
+from ...scrapers import *
+
+
+def generate_filename() -> str:
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return f"JobSpy_results_{timestamp}.csv"
+
+
+class CSVFormatter:
+    @staticmethod
+    def format(jobs: ScraperResponse) -> StringIO:
+        """
+        Transfomr the jobs objects into csv
+        :param jobs:
+        :return: csv
+        """
+        output = StringIO()
+        writer = csv.writer(output)
+
+        headers = [
+            "Site",
+            "Title",
+            "Company Name",
+            "Job URL",
+            "Country",
+            "City",
+            "State",
+            "Job Type",
+            "Compensation Interval",
+            "Min Amount",
+            "Max Amount",
+            "Currency",
+            "Date Posted",
+            "Description",
+        ]
+        writer.writerow(headers)
+
+        for site, job_response in jobs.dict().items():
+            if job_response and job_response.get("success"):
+                for job in job_response["jobs"]:
+                    writer.writerow(
+                        [
+                            site,
+                            job["title"],
+                            job["company_name"],
+                            job["job_url"],
+                            job["location"]["country"],
+                            job["location"]["city"],
+                            job["location"]["state"],
+                            job["job_type"].value if job.get("job_type") else "",
+                            job["compensation"]["interval"].value
+                            if job["compensation"]
+                            else "",
+                            job["compensation"]["min_amount"]
+                            if job["compensation"]
+                            else "",
+                            job["compensation"]["max_amount"]
+                            if job["compensation"]
+                            else "",
+                            job["compensation"]["currency"]
+                            if job["compensation"]
+                            else "",
+                            job.get("date_posted", ""),
+                            job["description"],
+                        ]
+                    )
+
+        output.seek(0)
+        return output
--- a/api/core/scrapers/init.py
+++ b/api/core/scrapers/init.py
@@ -1,5 +1,6 @@
 from ..jobs import *
-from typing import List
+from ..formatters import OutputFormat
+from typing import List, Dict, Optional


 class StatusException(Exception):
@@ -16,6 +17,7 @@ class Site(Enum):
 class ScraperInput(BaseModel):
    site_type: List[Site]
    search_term: str
+    output_format: OutputFormat = OutputFormat.JSON

    location: str = None
    distance: int = None
@@ -26,6 +28,12 @@ class ScraperInput(BaseModel):
    results_wanted: int = 15


+class ScraperResponse(BaseModel):
+    linkedin: Optional[JobResponse]
+    indeed: Optional[JobResponse]
+    zip_recruiter: Optional[JobResponse]
+
+
 class Scraper:
    def __init__(self, site: Site, url: str):
        self.site = site
--- a/api/core/scrapers/ziprecruiter/init.py
+++ b/api/core/scrapers/ziprecruiter/init.py
@@ -96,7 +96,9 @@ class ZipRecruiterScraper(Scraper):
            title = job.find("h2", {"class": "title"}).text
            company = job.find("a", {"class": "company_name"}).text.strip()

-            description, updated_job_url = ZipRecruiterScraper.get_description(job_url, session)
+            description, updated_job_url = ZipRecruiterScraper.get_description(
+                job_url, session
+            )
            if updated_job_url is not None:
                job_url = updated_job_url
            if description is None: