JobSpy/api/core/formatters/csv/__init__.py

134 lines
4.1 KiB
Python
Raw Normal View History

2023-08-27 18:32:46 -07:00
import gspread
from oauth2client.service_account import ServiceAccountCredentials
2023-08-27 14:25:48 -07:00
import csv
from io import StringIO
from datetime import datetime
from ...jobs import *
from ...scrapers import *
2023-08-27 18:32:46 -07:00
from settings import *
2023-08-27 14:25:48 -07:00
2023-08-27 18:32:46 -07:00
class CSVFormatter:
2023-08-31 08:29:43 -07:00
@staticmethod
def fetch_job_urls(credentials: Any) -> set:
"""
Fetches all the job urls from the google sheet to prevent duplicates
:param credentials:
:return: urls
"""
try:
gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME)
worksheet = sh.get_worksheet(0)
data = worksheet.get_all_values()
job_urls = set()
for row in data[1:]:
job_urls.add(row[3])
return job_urls
except Exception as e:
raise e
2023-08-27 18:32:46 -07:00
@staticmethod
def upload_to_google_sheet(csv_data: str):
2023-08-31 08:29:43 -07:00
"""
Appends rows to google sheet
:param csv_data:
:return:
"""
2023-08-27 18:32:46 -07:00
try:
scope = [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive",
]
credentials = ServiceAccountCredentials.from_json_keyfile_name(
2023-08-28 10:15:13 -07:00
"client_secret.json", scope
2023-08-27 18:32:46 -07:00
)
gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME)
2023-08-27 14:25:48 -07:00
2023-08-27 18:32:46 -07:00
worksheet = sh.get_worksheet(0)
data_string = csv_data.getvalue()
reader = csv.reader(StringIO(data_string))
2023-08-31 08:29:43 -07:00
job_urls = CSVFormatter.fetch_job_urls(credentials)
2023-08-27 18:32:46 -07:00
rows = list(reader)
for i, row in enumerate(rows):
if i == 0:
continue
2023-08-31 08:29:43 -07:00
if row[4] in job_urls:
continue
row[6] = format(int(row[6]), ",d") if row[6] else ""
row[7] = format(int(row[7]), ",d") if row[7] else ""
2023-08-27 18:32:46 -07:00
worksheet.append_row(row)
except Exception as e:
raise e
@staticmethod
def generate_filename() -> str:
2023-08-31 08:29:43 -07:00
"""
Adds a timestamp to the filename header
:return: filename
"""
2023-08-28 10:15:13 -07:00
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
2023-08-27 14:25:48 -07:00
@staticmethod
2023-08-27 18:32:46 -07:00
def format(jobs: CommonResponse) -> StringIO:
2023-08-27 14:25:48 -07:00
"""
Transfomr the jobs objects into csv
:param jobs:
:return: csv
"""
output = StringIO()
writer = csv.writer(output)
headers = [
"Title",
"Company Name",
"City",
"State",
"Job Type",
2023-08-31 08:29:43 -07:00
"Pay Cycle",
2023-08-27 14:25:48 -07:00
"Min Amount",
"Max Amount",
"Date Posted",
"Description",
2023-08-31 08:29:43 -07:00
"Job URL",
2023-08-27 14:25:48 -07:00
]
writer.writerow(headers)
for site, job_response in jobs.dict().items():
2023-08-27 18:32:46 -07:00
if isinstance(job_response, dict) and job_response.get("success"):
2023-08-27 14:25:48 -07:00
for job in job_response["jobs"]:
writer.writerow(
[
job["title"],
job["company_name"],
job["location"]["city"],
job["location"]["state"],
job["job_type"].value if job.get("job_type") else "",
job["compensation"]["interval"].value
if job["compensation"]
else "",
job["compensation"]["min_amount"]
if job["compensation"]
else "",
job["compensation"]["max_amount"]
if job["compensation"]
else "",
job.get("date_posted", ""),
job["description"],
2023-08-31 08:29:43 -07:00
job["job_url"],
2023-08-27 14:25:48 -07:00
]
)
output.seek(0)
return output