dynamic yml and py update

pull/268/head
fakebranden 2025-04-14 21:37:07 +00:00
parent 0a5c5fa9b3
commit 6a326b7dd4
3 changed files with 57 additions and 80 deletions

View File

@ -47,11 +47,11 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Write user-specific config.json
- name: Write user config.json
run: |
echo "{
\"user_email\": \"${{ github.event.inputs.user_email }}\",
\"search_terms\": [\"${{ github.event.inputs.search_terms }}\"],
\"search_terms\": \"${{ github.event.inputs.search_terms }}\",
\"results_wanted\": ${{ github.event.inputs.results_wanted }},
\"max_days_old\": ${{ github.event.inputs.max_days_old }},
\"target_state\": \"${{ github.event.inputs.target_state }}\"
@ -60,12 +60,23 @@ jobs:
- name: Run JobSpy Scraper Dynamic
run: python job_scraper_dynamic.py
- name: Upload user-specific CSV as artifact
- name: Sanitize email for filename
id: sanitize
run: |
safe_name=$(echo "${{ github.event.inputs.user_email }}" | sed 's/@/_at_/g; s/\./_/g')
echo "::set-output name=safe_name::$safe_name"
- name: Verify user-specific CSV exists
run: |
if [ ! -f "jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv" ]; then
echo "❌ ERROR: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv not found!"
exit 1
else
echo "✅ Found: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv"
fi
- name: Upload jobspy output
uses: actions/upload-artifact@v4
with:
name: jobspy-output-${{ github.event.inputs.user_email }}
path: |
jobspy_output_dynamic_${{ github.event.inputs.user_email }}
.replace('@','_at_')
.replace('.','_')
.csv
name: jobspy-output-${{ steps.sanitize.outputs.safe_name }}
path: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv

View File

@ -2,7 +2,7 @@ import csv
import datetime
import json
import os
import re
from jobspy.google import Google
from jobspy.linkedin import LinkedIn
from jobspy.indeed import Indeed
@ -15,49 +15,41 @@ sources = {
"indeed": Indeed,
}
# Read dynamic user-specific config.json
with open("config.json", "r") as f:
config = json.load(f)
# Load user config
with open("config.json", "r") as file:
config = json.load(file)
search_terms = config.get("search_terms", [])
results_wanted = config.get("results_wanted", 100)
max_days_old = config.get("max_days_old", 2)
user_email = config.get("user_email")
search_terms = [term.strip() for term in config.get("search_terms", "").split(",")]
results_wanted = int(config.get("results_wanted", 100))
max_days_old = int(config.get("max_days_old", 2))
target_state = config.get("target_state", "NY")
user_email = config.get("user_email", "unknown@domain.com")
# Sanitize email for filename
safe_email = re.sub(r'[@.]', lambda x: '_at_' if x.group() == '@' else '_', user_email)
output_filename = f"jobspy_output_dynamic_{safe_email}.csv"
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
"""Scrape jobs from multiple sources and filter by state."""
def scrape_jobs():
all_jobs = []
today = datetime.date.today()
print("\n🔎 DEBUG: Fetching jobs for search terms:", search_terms)
print(f"\n🔎 Fetching jobs for: {search_terms}")
for search_term in search_terms:
for source_name, source_class in sources.items():
print(f"\n🚀 Scraping {search_term} from {source_name}...")
print(f"🚀 Scraping {search_term} from {source_name}...")
scraper = source_class()
search_criteria = ScraperInput(
input_params = ScraperInput(
site_type=[source_name],
search_term=search_term,
results_wanted=results_wanted,
)
results = scraper.scrape(input_params)
job_response = scraper.scrape(search_criteria)
for job in job_response.jobs:
location_city = job.location.city.strip() if job.location.city else "Unknown"
location_state = job.location.state.strip().upper() if job.location.state else "Unknown"
location_country = str(job.location.country) if job.location.country else "Unknown"
if not any(term.lower() in job.title.lower() for term in search_terms):
print(f"🚫 Excluding: {job.title} (Doesn't match search terms)")
continue
for job in results.jobs:
location_state = job.location.state.strip().upper() if job.location and job.location.state else "Unknown"
if job.date_posted and (today - job.date_posted).days <= max_days_old:
if location_state == target_state or job.is_remote:
print(f"✅ MATCH: {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
all_jobs.append({
"Job ID": job.id,
"Job Title (Primary)": job.title,
@ -70,61 +62,35 @@ def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
"Salary Min": job.compensation.min_amount if job.compensation else "",
"Salary Max": job.compensation.max_amount if job.compensation else "",
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided",
"Location City": location_city,
"Location City": job.location.city if job.location and job.location.city else "Unknown",
"Location State": location_state,
"Location Country": location_country,
"Location Country": str(job.location.country) if job.location and job.location.country else "Unknown",
"Job URL": job.job_url,
"Job Description": job.description.replace(",", "") if job.description else "No description available",
"Job Description": job.description.replace(",", "") if job.description else "No description",
"Job Source": source_name
})
else:
print(f"❌ Ignored (Wrong State): {job.title} - {location_city}, {location_state}")
else:
print(f"⏳ Ignored (Too Old): {job.title} - {location_city}, {location_state}")
print(f"\n{len(all_jobs)} jobs retrieved for user {user_email}")
return all_jobs
def save_jobs_to_csv(jobs, user_email):
"""Save job data to a user-specific CSV file using custom delimiter."""
def save_jobs_to_csv(jobs, filename):
if not jobs:
print("⚠️ No jobs found matching criteria.")
print("⚠️ No jobs found.")
return
# Clean the email to create a safe filename
safe_email = user_email.replace("@", "_at_").replace(".", "_")
filename = f"jobspy_output_dynamic_{safe_email}.csv"
fieldnames = list(jobs[0].keys())
header = "|~|".join(fieldnames)
records = [header]
# Remove old file if it exists
if os.path.exists(filename):
os.remove(filename)
for job in jobs:
row = [str(job.get(field, "Not Provided")).replace(",", "") for field in fieldnames]
records.append("|~|".join(row))
fieldnames = [
"Job ID", "Job Title (Primary)", "Company Name", "Industry",
"Experience Level", "Job Type", "Is Remote", "Currency",
"Salary Min", "Salary Max", "Date Posted", "Location City",
"Location State", "Location Country", "Job URL", "Job Description",
"Job Source", "User Email"
]
output = ",".join(records)
with open(filename, "w", encoding="utf-8") as f:
f.write(output)
with open(filename, mode="w", newline="", encoding="utf-8") as file:
writer = csv.DictWriter(file, fieldnames=fieldnames, delimiter="|")
writer.writeheader()
for job in jobs:
job["User Email"] = user_email
writer.writerow(job)
print(f"✅ Saved {len(jobs)} jobs to {filename}")
print(f"📄 File saved: {filename} ({len(jobs)} entries)")
return filename
# Run the scraper and save the results to a user-specific output file
job_data = scrape_jobs(
search_terms=search_terms,
results_wanted=results_wanted,
max_days_old=max_days_old,
target_state=target_state
)
output_filename = save_jobs_to_csv(job_data, user_email)
# Run
scraped_jobs = scrape_jobs()
save_jobs_to_csv(scraped_jobs, output_filename)