From c310ff61ee1875ed0a6be8407c58b11aa93a63c3 Mon Sep 17 00:00:00 2001 From: fakebranden Date: Wed, 16 Apr 2025 19:57:18 +0000 Subject: [PATCH] modified py for int string error --- job_scraper_dynamic.py | 173 +++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 109 deletions(-) diff --git a/job_scraper_dynamic.py b/job_scraper_dynamic.py index 6bac49e..68694e0 100644 --- a/job_scraper_dynamic.py +++ b/job_scraper_dynamic.py @@ -1,150 +1,105 @@ -import csv -import datetime -import os -import sys -import json +import csv, datetime, os, sys, json from jobspy.google import Google from jobspy.linkedin import LinkedIn from jobspy.indeed import Indeed from jobspy.model import ScraperInput - -# Define job sources +# Define sources sources = { "google": Google, "linkedin": LinkedIn, "indeed": Indeed, } - def sanitize_email(email): return email.replace("@", "_at_").replace(".", "_") +def load_config(email): + safe_email = sanitize_email(email) + config_path = os.path.join("configs", f"config_{safe_email}.json") + if not os.path.exists(config_path): + raise FileNotFoundError(f"āŒ Config for {email} not found at {config_path}") + with open(config_path, "r", encoding="utf-8") as f: + return json.load(f), safe_email -def load_config_file(email=None): - if email: - safe_email = sanitize_email(email) - config_path = os.path.join("configs", f"config_{safe_email}.json") - if os.path.exists(config_path): - print(f"šŸ“‚ Loading config for {email} → {config_path}") - with open(config_path, "r", encoding="utf-8") as f: - return json.load(f), safe_email - else: - raise FileNotFoundError(f"āŒ Config for {email} not found at {config_path}") - else: - raise ValueError("āŒ Email must be passed as argument") - - -def scrape_jobs(search_terms, results_wanted, max_days_old, target_state): - all_jobs = [] +def scrape_jobs(search_terms, results_wanted_str, max_days_old_str, target_state): + # Convert string values to integers + results_wanted = int(results_wanted_str.strip()) + max_days_old = int(max_days_old_str.strip()) today = datetime.date.today() - print(f"\nšŸ” Scraping jobs for: {search_terms}") - + all_jobs = [] for term in search_terms: - for source_name, source_class in sources.items(): - print(f"šŸš€ Scraping '{term}' from {source_name}...") - scraper = source_class() - criteria = ScraperInput(site_type=[source_name], search_term=term, results_wanted=results_wanted) - - + for source, Scraper in sources.items(): + print(f"šŸ” Scraping {term} from {source}") + scraper = Scraper() try: - response = scraper.scrape(criteria) + jobs = scraper.scrape(ScraperInput( + site_type=[source], + search_term=term, + results_wanted=results_wanted + )).jobs except Exception as e: - print(f"āŒ Error scraping {source_name}: {e}") + print(f"āš ļø {source} error: {e}") continue - - for job in response.jobs: - city = job.location.city.strip() if job.location.city else "Unknown" - state = job.location.state.strip().upper() if job.location.state else "Unknown" - country = str(job.location.country) if job.location.country else "Unknown" - - - if not any(t.lower() in job.title.lower() for t in search_terms): - continue - - + for job in jobs: if job.date_posted and (today - job.date_posted).days <= max_days_old: - if state == target_state or job.is_remote: - all_jobs.append({ - "Job ID": job.id, - "Job Title (Primary)": job.title, - "Company Name": job.company_name or "Unknown", - "Industry": job.company_industry or "Not Provided", - "Experience Level": job.job_level or "Not Provided", - "Job Type": job.job_type[0].name if job.job_type else "Not Provided", - "Is Remote": job.is_remote, - "Currency": job.compensation.currency if job.compensation else "", - "Salary Min": job.compensation.min_amount if job.compensation else "", - "Salary Max": job.compensation.max_amount if job.compensation else "", - "Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided", - "Location City": city, - "Location State": state, - "Location Country": country, - "Job URL": job.job_url, - "Job Description": job.description.replace(",", "") if job.description else "No description available", - "Job Source": source_name - }) - print(f"āœ… {len(all_jobs)} jobs matched.") + if target_state == (job.location.state or "").upper() or job.is_remote: + if any(term.lower() in job.title.lower() for term in search_terms): + all_jobs.append({ + "Job ID": job.id, + "Job Title (Primary)": job.title, + "Company Name": job.company_name or "Unknown", + "Industry": job.company_industry or "Not Provided", + "Experience Level": job.job_level or "Not Provided", + "Job Type": job.job_type[0].name if job.job_type else "Not Provided", + "Is Remote": job.is_remote, + "Currency": job.compensation.currency if job.compensation else "", + "Salary Min": job.compensation.min_amount if job.compensation else "", + "Salary Max": job.compensation.max_amount if job.compensation else "", + "Date Posted": job.date_posted.strftime("%Y-%m-%d"), + "Location City": job.location.city or "Unknown", + "Location State": (job.location.state or "Unknown").upper(), + "Location Country": job.location.country or "Unknown", + "Job URL": job.job_url, + "Job Description": job.description.replace(",", "") if job.description else "No description", + "Job Source": source + }) + print(f"āœ… Found {len(all_jobs)} jobs") return all_jobs - -def save_jobs_to_csv(jobs, output_path): - if not jobs: - print("āš ļø No jobs found.") - return - - +def save_to_csv(jobs, path): + os.makedirs(os.path.dirname(path), exist_ok=True) fieldnames = [ "Job ID", "Job Title (Primary)", "Company Name", "Industry", "Experience Level", "Job Type", "Is Remote", "Currency", "Salary Min", "Salary Max", "Date Posted", "Location City", - "Location State", "Location Country", "Job URL", "Job Description", - "Job Source" + "Location State", "Location Country", "Job URL", "Job Description", "Job Source" ] - - header = "|~|".join(fieldnames) - rows = [header] + rows = [header] + ["|~|".join(str(job.get(col, "Not Provided")).replace(",", "").strip() for col in fieldnames) for job in jobs] + with open(path, "w", encoding="utf-8") as f: + f.write(",".join(rows)) + print(f"šŸ’¾ Saved output to: {path}") - - for job in jobs: - row = [] - for field in fieldnames: - value = str(job.get(field, "Not Provided")).replace(",", "").strip() - row.append(value if value else "Not Provided") - rows.append("|~|".join(row)) - - - output = ",".join(rows) - os.makedirs(os.path.dirname(output_path), exist_ok=True) - with open(output_path, "w", encoding="utf-8") as f: - f.write(output) - - - print(f"šŸ’¾ Saved output to: {output_path}") - - -# MAIN if __name__ == "__main__": try: - user_email = sys.argv[1] if len(sys.argv) >= 2 else None - config, safe_email = load_config_file(user_email) + if len(sys.argv) != 3: + raise ValueError("āŒ Usage: python job_scraper_dynamic.py ") + user_email, run_id = sys.argv[1], sys.argv[2] + config, safe_email = load_config(user_email) - job_data = scrape_jobs( - search_terms=config["search_terms"], - results_wanted=config["results_wanted"], - max_days_old=config["max_days_old"], - target_state=config["target_state"] + jobs = scrape_jobs( + config["search_terms"], + config["results_wanted"], + config["max_days_old"], + config["target_state"] ) - - output_file = f"outputs/jobspy_output_{safe_email}.csv" - save_jobs_to_csv(job_data, output_file) - + save_to_csv(jobs, f"outputs/jobspy_output_{safe_email}_{run_id}.csv") except Exception as e: - print(f"āŒ Fatal Error: {e}") + print(f"āŒ Fatal error: {e}") sys.exit(1)