mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 20:14:32 -08:00
remove duplicates - gsheets (#29)
This commit is contained in:
@@ -11,8 +11,33 @@ from settings import *
|
||||
|
||||
|
||||
class CSVFormatter:
|
||||
@staticmethod
|
||||
def fetch_job_urls(credentials: Any) -> set:
|
||||
"""
|
||||
Fetches all the job urls from the google sheet to prevent duplicates
|
||||
:param credentials:
|
||||
:return: urls
|
||||
"""
|
||||
try:
|
||||
gc = gspread.authorize(credentials)
|
||||
sh = gc.open(GSHEET_NAME)
|
||||
|
||||
worksheet = sh.get_worksheet(0)
|
||||
data = worksheet.get_all_values()
|
||||
job_urls = set()
|
||||
for row in data[1:]:
|
||||
job_urls.add(row[3])
|
||||
return job_urls
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def upload_to_google_sheet(csv_data: str):
|
||||
"""
|
||||
Appends rows to google sheet
|
||||
:param csv_data:
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
scope = [
|
||||
"https://www.googleapis.com/auth/spreadsheets",
|
||||
@@ -29,22 +54,28 @@ class CSVFormatter:
|
||||
data_string = csv_data.getvalue()
|
||||
reader = csv.reader(StringIO(data_string))
|
||||
|
||||
job_urls = CSVFormatter.fetch_job_urls(credentials)
|
||||
|
||||
rows = list(reader)
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
if i == 0:
|
||||
continue
|
||||
if row[4] in job_urls:
|
||||
continue
|
||||
|
||||
row[6] = format(int(row[6]), ",d") if row[6] else ""
|
||||
row[7] = format(int(row[7]), ",d") if row[7] else ""
|
||||
worksheet.append_row(row)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def generate_filename() -> str:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return f"JobSpy_results_{timestamp}.csv"
|
||||
|
||||
@staticmethod
|
||||
def generate_filename() -> str:
|
||||
"""
|
||||
Adds a timestamp to the filename header
|
||||
:return: filename
|
||||
"""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return f"JobSpy_results_{timestamp}.csv"
|
||||
|
||||
@@ -59,20 +90,17 @@ class CSVFormatter:
|
||||
writer = csv.writer(output)
|
||||
|
||||
headers = [
|
||||
"Site",
|
||||
"Title",
|
||||
"Company Name",
|
||||
"Job URL",
|
||||
"Country",
|
||||
"City",
|
||||
"State",
|
||||
"Job Type",
|
||||
"Compensation Interval",
|
||||
"Pay Cycle",
|
||||
"Min Amount",
|
||||
"Max Amount",
|
||||
"Currency",
|
||||
"Date Posted",
|
||||
"Description",
|
||||
"Job URL",
|
||||
]
|
||||
writer.writerow(headers)
|
||||
|
||||
@@ -81,11 +109,8 @@ class CSVFormatter:
|
||||
for job in job_response["jobs"]:
|
||||
writer.writerow(
|
||||
[
|
||||
site,
|
||||
job["title"],
|
||||
job["company_name"],
|
||||
job["job_url"],
|
||||
job["location"]["country"],
|
||||
job["location"]["city"],
|
||||
job["location"]["state"],
|
||||
job["job_type"].value if job.get("job_type") else "",
|
||||
@@ -98,11 +123,9 @@ class CSVFormatter:
|
||||
job["compensation"]["max_amount"]
|
||||
if job["compensation"]
|
||||
else "",
|
||||
job["compensation"]["currency"]
|
||||
if job["compensation"]
|
||||
else "",
|
||||
job.get("date_posted", ""),
|
||||
job["description"],
|
||||
job["job_url"],
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user