mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 03:54:31 -08:00
Add Csv output (#20)
This commit is contained in:
6
api/core/formatters/__init__.py
Normal file
6
api/core/formatters/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class OutputFormat(Enum):
|
||||
CSV = "csv"
|
||||
JSON = "json"
|
||||
74
api/core/formatters/csv/__init__.py
Normal file
74
api/core/formatters/csv/__init__.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import csv
|
||||
from io import StringIO
|
||||
from datetime import datetime
|
||||
|
||||
from ...jobs import *
|
||||
from ...scrapers import *
|
||||
|
||||
|
||||
def generate_filename() -> str:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return f"JobSpy_results_{timestamp}.csv"
|
||||
|
||||
|
||||
class CSVFormatter:
|
||||
@staticmethod
|
||||
def format(jobs: ScraperResponse) -> StringIO:
|
||||
"""
|
||||
Transfomr the jobs objects into csv
|
||||
:param jobs:
|
||||
:return: csv
|
||||
"""
|
||||
output = StringIO()
|
||||
writer = csv.writer(output)
|
||||
|
||||
headers = [
|
||||
"Site",
|
||||
"Title",
|
||||
"Company Name",
|
||||
"Job URL",
|
||||
"Country",
|
||||
"City",
|
||||
"State",
|
||||
"Job Type",
|
||||
"Compensation Interval",
|
||||
"Min Amount",
|
||||
"Max Amount",
|
||||
"Currency",
|
||||
"Date Posted",
|
||||
"Description",
|
||||
]
|
||||
writer.writerow(headers)
|
||||
|
||||
for site, job_response in jobs.dict().items():
|
||||
if job_response and job_response.get("success"):
|
||||
for job in job_response["jobs"]:
|
||||
writer.writerow(
|
||||
[
|
||||
site,
|
||||
job["title"],
|
||||
job["company_name"],
|
||||
job["job_url"],
|
||||
job["location"]["country"],
|
||||
job["location"]["city"],
|
||||
job["location"]["state"],
|
||||
job["job_type"].value if job.get("job_type") else "",
|
||||
job["compensation"]["interval"].value
|
||||
if job["compensation"]
|
||||
else "",
|
||||
job["compensation"]["min_amount"]
|
||||
if job["compensation"]
|
||||
else "",
|
||||
job["compensation"]["max_amount"]
|
||||
if job["compensation"]
|
||||
else "",
|
||||
job["compensation"]["currency"]
|
||||
if job["compensation"]
|
||||
else "",
|
||||
job.get("date_posted", ""),
|
||||
job["description"],
|
||||
]
|
||||
)
|
||||
|
||||
output.seek(0)
|
||||
return output
|
||||
@@ -1,5 +1,6 @@
|
||||
from ..jobs import *
|
||||
from typing import List
|
||||
from ..formatters import OutputFormat
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
|
||||
class StatusException(Exception):
|
||||
@@ -16,6 +17,7 @@ class Site(Enum):
|
||||
class ScraperInput(BaseModel):
|
||||
site_type: List[Site]
|
||||
search_term: str
|
||||
output_format: OutputFormat = OutputFormat.JSON
|
||||
|
||||
location: str = None
|
||||
distance: int = None
|
||||
@@ -26,6 +28,12 @@ class ScraperInput(BaseModel):
|
||||
results_wanted: int = 15
|
||||
|
||||
|
||||
class ScraperResponse(BaseModel):
|
||||
linkedin: Optional[JobResponse]
|
||||
indeed: Optional[JobResponse]
|
||||
zip_recruiter: Optional[JobResponse]
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, site: Site, url: str):
|
||||
self.site = site
|
||||
|
||||
@@ -96,7 +96,9 @@ class ZipRecruiterScraper(Scraper):
|
||||
title = job.find("h2", {"class": "title"}).text
|
||||
company = job.find("a", {"class": "company_name"}).text.strip()
|
||||
|
||||
description, updated_job_url = ZipRecruiterScraper.get_description(job_url, session)
|
||||
description, updated_job_url = ZipRecruiterScraper.get_description(
|
||||
job_url, session
|
||||
)
|
||||
if updated_job_url is not None:
|
||||
job_url = updated_job_url
|
||||
if description is None:
|
||||
|
||||
Reference in New Issue
Block a user