dynamic yml and py update

pull/268/head
fakebranden 2025-04-14 21:37:07 +00:00
parent 0a5c5fa9b3
commit 6a326b7dd4
3 changed files with 57 additions and 80 deletions

View File

@ -47,11 +47,11 @@ jobs:
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install -r requirements.txt pip install -r requirements.txt
- name: Write user-specific config.json - name: Write user config.json
run: | run: |
echo "{ echo "{
\"user_email\": \"${{ github.event.inputs.user_email }}\", \"user_email\": \"${{ github.event.inputs.user_email }}\",
\"search_terms\": [\"${{ github.event.inputs.search_terms }}\"], \"search_terms\": \"${{ github.event.inputs.search_terms }}\",
\"results_wanted\": ${{ github.event.inputs.results_wanted }}, \"results_wanted\": ${{ github.event.inputs.results_wanted }},
\"max_days_old\": ${{ github.event.inputs.max_days_old }}, \"max_days_old\": ${{ github.event.inputs.max_days_old }},
\"target_state\": \"${{ github.event.inputs.target_state }}\" \"target_state\": \"${{ github.event.inputs.target_state }}\"
@ -60,12 +60,23 @@ jobs:
- name: Run JobSpy Scraper Dynamic - name: Run JobSpy Scraper Dynamic
run: python job_scraper_dynamic.py run: python job_scraper_dynamic.py
- name: Upload user-specific CSV as artifact - name: Sanitize email for filename
id: sanitize
run: |
safe_name=$(echo "${{ github.event.inputs.user_email }}" | sed 's/@/_at_/g; s/\./_/g')
echo "::set-output name=safe_name::$safe_name"
- name: Verify user-specific CSV exists
run: |
if [ ! -f "jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv" ]; then
echo "❌ ERROR: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv not found!"
exit 1
else
echo "✅ Found: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv"
fi
- name: Upload jobspy output
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: jobspy-output-${{ github.event.inputs.user_email }} name: jobspy-output-${{ steps.sanitize.outputs.safe_name }}
path: | path: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv
jobspy_output_dynamic_${{ github.event.inputs.user_email }}
.replace('@','_at_')
.replace('.','_')
.csv

View File

@ -2,7 +2,7 @@ import csv
import datetime import datetime
import json import json
import os import os
import re
from jobspy.google import Google from jobspy.google import Google
from jobspy.linkedin import LinkedIn from jobspy.linkedin import LinkedIn
from jobspy.indeed import Indeed from jobspy.indeed import Indeed
@ -15,49 +15,41 @@ sources = {
"indeed": Indeed, "indeed": Indeed,
} }
# Read dynamic user-specific config.json # Load user config
with open("config.json", "r") as f: with open("config.json", "r") as file:
config = json.load(f) config = json.load(file)
search_terms = config.get("search_terms", []) user_email = config.get("user_email")
results_wanted = config.get("results_wanted", 100) search_terms = [term.strip() for term in config.get("search_terms", "").split(",")]
max_days_old = config.get("max_days_old", 2) results_wanted = int(config.get("results_wanted", 100))
max_days_old = int(config.get("max_days_old", 2))
target_state = config.get("target_state", "NY") target_state = config.get("target_state", "NY")
user_email = config.get("user_email", "unknown@domain.com")
# Sanitize email for filename
safe_email = re.sub(r'[@.]', lambda x: '_at_' if x.group() == '@' else '_', user_email)
output_filename = f"jobspy_output_dynamic_{safe_email}.csv"
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state): def scrape_jobs():
"""Scrape jobs from multiple sources and filter by state."""
all_jobs = [] all_jobs = []
today = datetime.date.today() today = datetime.date.today()
print("\n🔎 DEBUG: Fetching jobs for search terms:", search_terms) print(f"\n🔎 Fetching jobs for: {search_terms}")
for search_term in search_terms: for search_term in search_terms:
for source_name, source_class in sources.items(): for source_name, source_class in sources.items():
print(f"\n🚀 Scraping {search_term} from {source_name}...") print(f"🚀 Scraping {search_term} from {source_name}...")
scraper = source_class() scraper = source_class()
search_criteria = ScraperInput( input_params = ScraperInput(
site_type=[source_name], site_type=[source_name],
search_term=search_term, search_term=search_term,
results_wanted=results_wanted, results_wanted=results_wanted,
) )
results = scraper.scrape(input_params)
job_response = scraper.scrape(search_criteria) for job in results.jobs:
location_state = job.location.state.strip().upper() if job.location and job.location.state else "Unknown"
for job in job_response.jobs:
location_city = job.location.city.strip() if job.location.city else "Unknown"
location_state = job.location.state.strip().upper() if job.location.state else "Unknown"
location_country = str(job.location.country) if job.location.country else "Unknown"
if not any(term.lower() in job.title.lower() for term in search_terms):
print(f"🚫 Excluding: {job.title} (Doesn't match search terms)")
continue
if job.date_posted and (today - job.date_posted).days <= max_days_old: if job.date_posted and (today - job.date_posted).days <= max_days_old:
if location_state == target_state or job.is_remote: if location_state == target_state or job.is_remote:
print(f"✅ MATCH: {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
all_jobs.append({ all_jobs.append({
"Job ID": job.id, "Job ID": job.id,
"Job Title (Primary)": job.title, "Job Title (Primary)": job.title,
@ -70,61 +62,35 @@ def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
"Salary Min": job.compensation.min_amount if job.compensation else "", "Salary Min": job.compensation.min_amount if job.compensation else "",
"Salary Max": job.compensation.max_amount if job.compensation else "", "Salary Max": job.compensation.max_amount if job.compensation else "",
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided", "Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided",
"Location City": location_city, "Location City": job.location.city if job.location and job.location.city else "Unknown",
"Location State": location_state, "Location State": location_state,
"Location Country": location_country, "Location Country": str(job.location.country) if job.location and job.location.country else "Unknown",
"Job URL": job.job_url, "Job URL": job.job_url,
"Job Description": job.description.replace(",", "") if job.description else "No description available", "Job Description": job.description.replace(",", "") if job.description else "No description",
"Job Source": source_name "Job Source": source_name
}) })
else:
print(f"❌ Ignored (Wrong State): {job.title} - {location_city}, {location_state}")
else:
print(f"⏳ Ignored (Too Old): {job.title} - {location_city}, {location_state}")
print(f"\n{len(all_jobs)} jobs retrieved for user {user_email}")
return all_jobs return all_jobs
def save_jobs_to_csv(jobs, filename):
def save_jobs_to_csv(jobs, user_email):
"""Save job data to a user-specific CSV file using custom delimiter."""
if not jobs: if not jobs:
print("⚠️ No jobs found matching criteria.") print("⚠️ No jobs found.")
return return
# Clean the email to create a safe filename fieldnames = list(jobs[0].keys())
safe_email = user_email.replace("@", "_at_").replace(".", "_") header = "|~|".join(fieldnames)
filename = f"jobspy_output_dynamic_{safe_email}.csv" records = [header]
# Remove old file if it exists for job in jobs:
if os.path.exists(filename): row = [str(job.get(field, "Not Provided")).replace(",", "") for field in fieldnames]
os.remove(filename) records.append("|~|".join(row))
fieldnames = [ output = ",".join(records)
"Job ID", "Job Title (Primary)", "Company Name", "Industry", with open(filename, "w", encoding="utf-8") as f:
"Experience Level", "Job Type", "Is Remote", "Currency", f.write(output)
"Salary Min", "Salary Max", "Date Posted", "Location City",
"Location State", "Location Country", "Job URL", "Job Description",
"Job Source", "User Email"
]
with open(filename, mode="w", newline="", encoding="utf-8") as file: print(f"✅ Saved {len(jobs)} jobs to {filename}")
writer = csv.DictWriter(file, fieldnames=fieldnames, delimiter="|")
writer.writeheader()
for job in jobs:
job["User Email"] = user_email
writer.writerow(job)
print(f"📄 File saved: {filename} ({len(jobs)} entries)") # Run
return filename scraped_jobs = scrape_jobs()
save_jobs_to_csv(scraped_jobs, output_filename)
# Run the scraper and save the results to a user-specific output file
job_data = scrape_jobs(
search_terms=search_terms,
results_wanted=results_wanted,
max_days_old=max_days_old,
target_state=target_state
)
output_filename = save_jobs_to_csv(job_data, user_email)