Add Csv output (#20)

This commit is contained in:
Cullen Watson
2023-08-27 16:25:48 -05:00
committed by GitHub
parent 32a5bb37cd
commit 80a02faa75
8 changed files with 230 additions and 50 deletions

View File

@@ -0,0 +1,6 @@
from enum import Enum
class OutputFormat(Enum):
CSV = "csv"
JSON = "json"

View File

@@ -0,0 +1,74 @@
import csv
from io import StringIO
from datetime import datetime
from ...jobs import *
from ...scrapers import *
def generate_filename() -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
class CSVFormatter:
@staticmethod
def format(jobs: ScraperResponse) -> StringIO:
"""
Transfomr the jobs objects into csv
:param jobs:
:return: csv
"""
output = StringIO()
writer = csv.writer(output)
headers = [
"Site",
"Title",
"Company Name",
"Job URL",
"Country",
"City",
"State",
"Job Type",
"Compensation Interval",
"Min Amount",
"Max Amount",
"Currency",
"Date Posted",
"Description",
]
writer.writerow(headers)
for site, job_response in jobs.dict().items():
if job_response and job_response.get("success"):
for job in job_response["jobs"]:
writer.writerow(
[
site,
job["title"],
job["company_name"],
job["job_url"],
job["location"]["country"],
job["location"]["city"],
job["location"]["state"],
job["job_type"].value if job.get("job_type") else "",
job["compensation"]["interval"].value
if job["compensation"]
else "",
job["compensation"]["min_amount"]
if job["compensation"]
else "",
job["compensation"]["max_amount"]
if job["compensation"]
else "",
job["compensation"]["currency"]
if job["compensation"]
else "",
job.get("date_posted", ""),
job["description"],
]
)
output.seek(0)
return output

View File

@@ -1,5 +1,6 @@
from ..jobs import *
from typing import List
from ..formatters import OutputFormat
from typing import List, Dict, Optional
class StatusException(Exception):
@@ -16,6 +17,7 @@ class Site(Enum):
class ScraperInput(BaseModel):
site_type: List[Site]
search_term: str
output_format: OutputFormat = OutputFormat.JSON
location: str = None
distance: int = None
@@ -26,6 +28,12 @@ class ScraperInput(BaseModel):
results_wanted: int = 15
class ScraperResponse(BaseModel):
linkedin: Optional[JobResponse]
indeed: Optional[JobResponse]
zip_recruiter: Optional[JobResponse]
class Scraper:
def __init__(self, site: Site, url: str):
self.site = site

View File

@@ -96,7 +96,9 @@ class ZipRecruiterScraper(Scraper):
title = job.find("h2", {"class": "title"}).text
company = job.find("a", {"class": "company_name"}).text.strip()
description, updated_job_url = ZipRecruiterScraper.get_description(job_url, session)
description, updated_job_url = ZipRecruiterScraper.get_description(
job_url, session
)
if updated_job_url is not None:
job_url = updated_job_url
if description is None: