mirror of https://github.com/Bunsly/JobSpy
dynamic yml and py update
parent
0a5c5fa9b3
commit
6a326b7dd4
|
@ -47,11 +47,11 @@ jobs:
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
- name: Write user-specific config.json
|
- name: Write user config.json
|
||||||
run: |
|
run: |
|
||||||
echo "{
|
echo "{
|
||||||
\"user_email\": \"${{ github.event.inputs.user_email }}\",
|
\"user_email\": \"${{ github.event.inputs.user_email }}\",
|
||||||
\"search_terms\": [\"${{ github.event.inputs.search_terms }}\"],
|
\"search_terms\": \"${{ github.event.inputs.search_terms }}\",
|
||||||
\"results_wanted\": ${{ github.event.inputs.results_wanted }},
|
\"results_wanted\": ${{ github.event.inputs.results_wanted }},
|
||||||
\"max_days_old\": ${{ github.event.inputs.max_days_old }},
|
\"max_days_old\": ${{ github.event.inputs.max_days_old }},
|
||||||
\"target_state\": \"${{ github.event.inputs.target_state }}\"
|
\"target_state\": \"${{ github.event.inputs.target_state }}\"
|
||||||
|
@ -60,12 +60,23 @@ jobs:
|
||||||
- name: Run JobSpy Scraper Dynamic
|
- name: Run JobSpy Scraper Dynamic
|
||||||
run: python job_scraper_dynamic.py
|
run: python job_scraper_dynamic.py
|
||||||
|
|
||||||
- name: Upload user-specific CSV as artifact
|
- name: Sanitize email for filename
|
||||||
|
id: sanitize
|
||||||
|
run: |
|
||||||
|
safe_name=$(echo "${{ github.event.inputs.user_email }}" | sed 's/@/_at_/g; s/\./_/g')
|
||||||
|
echo "::set-output name=safe_name::$safe_name"
|
||||||
|
|
||||||
|
- name: Verify user-specific CSV exists
|
||||||
|
run: |
|
||||||
|
if [ ! -f "jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv" ]; then
|
||||||
|
echo "❌ ERROR: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv not found!"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
echo "✅ Found: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Upload jobspy output
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: jobspy-output-${{ github.event.inputs.user_email }}
|
name: jobspy-output-${{ steps.sanitize.outputs.safe_name }}
|
||||||
path: |
|
path: jobspy_output_dynamic_${{ steps.sanitize.outputs.safe_name }}.csv
|
||||||
jobspy_output_dynamic_${{ github.event.inputs.user_email }}
|
|
||||||
.replace('@','_at_')
|
|
||||||
.replace('.','_')
|
|
||||||
.csv
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import csv
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from jobspy.google import Google
|
from jobspy.google import Google
|
||||||
from jobspy.linkedin import LinkedIn
|
from jobspy.linkedin import LinkedIn
|
||||||
from jobspy.indeed import Indeed
|
from jobspy.indeed import Indeed
|
||||||
|
@ -15,49 +15,41 @@ sources = {
|
||||||
"indeed": Indeed,
|
"indeed": Indeed,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Read dynamic user-specific config.json
|
# Load user config
|
||||||
with open("config.json", "r") as f:
|
with open("config.json", "r") as file:
|
||||||
config = json.load(f)
|
config = json.load(file)
|
||||||
|
|
||||||
search_terms = config.get("search_terms", [])
|
user_email = config.get("user_email")
|
||||||
results_wanted = config.get("results_wanted", 100)
|
search_terms = [term.strip() for term in config.get("search_terms", "").split(",")]
|
||||||
max_days_old = config.get("max_days_old", 2)
|
results_wanted = int(config.get("results_wanted", 100))
|
||||||
|
max_days_old = int(config.get("max_days_old", 2))
|
||||||
target_state = config.get("target_state", "NY")
|
target_state = config.get("target_state", "NY")
|
||||||
user_email = config.get("user_email", "unknown@domain.com")
|
|
||||||
|
|
||||||
|
# Sanitize email for filename
|
||||||
|
safe_email = re.sub(r'[@.]', lambda x: '_at_' if x.group() == '@' else '_', user_email)
|
||||||
|
output_filename = f"jobspy_output_dynamic_{safe_email}.csv"
|
||||||
|
|
||||||
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
|
def scrape_jobs():
|
||||||
"""Scrape jobs from multiple sources and filter by state."""
|
|
||||||
all_jobs = []
|
all_jobs = []
|
||||||
today = datetime.date.today()
|
today = datetime.date.today()
|
||||||
|
|
||||||
print("\n🔎 DEBUG: Fetching jobs for search terms:", search_terms)
|
print(f"\n🔎 Fetching jobs for: {search_terms}")
|
||||||
|
|
||||||
for search_term in search_terms:
|
for search_term in search_terms:
|
||||||
for source_name, source_class in sources.items():
|
for source_name, source_class in sources.items():
|
||||||
print(f"\n🚀 Scraping {search_term} from {source_name}...")
|
print(f"🚀 Scraping {search_term} from {source_name}...")
|
||||||
|
|
||||||
scraper = source_class()
|
scraper = source_class()
|
||||||
search_criteria = ScraperInput(
|
input_params = ScraperInput(
|
||||||
site_type=[source_name],
|
site_type=[source_name],
|
||||||
search_term=search_term,
|
search_term=search_term,
|
||||||
results_wanted=results_wanted,
|
results_wanted=results_wanted,
|
||||||
)
|
)
|
||||||
|
results = scraper.scrape(input_params)
|
||||||
|
|
||||||
job_response = scraper.scrape(search_criteria)
|
for job in results.jobs:
|
||||||
|
location_state = job.location.state.strip().upper() if job.location and job.location.state else "Unknown"
|
||||||
for job in job_response.jobs:
|
|
||||||
location_city = job.location.city.strip() if job.location.city else "Unknown"
|
|
||||||
location_state = job.location.state.strip().upper() if job.location.state else "Unknown"
|
|
||||||
location_country = str(job.location.country) if job.location.country else "Unknown"
|
|
||||||
|
|
||||||
if not any(term.lower() in job.title.lower() for term in search_terms):
|
|
||||||
print(f"🚫 Excluding: {job.title} (Doesn't match search terms)")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if job.date_posted and (today - job.date_posted).days <= max_days_old:
|
if job.date_posted and (today - job.date_posted).days <= max_days_old:
|
||||||
if location_state == target_state or job.is_remote:
|
if location_state == target_state or job.is_remote:
|
||||||
print(f"✅ MATCH: {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
|
|
||||||
all_jobs.append({
|
all_jobs.append({
|
||||||
"Job ID": job.id,
|
"Job ID": job.id,
|
||||||
"Job Title (Primary)": job.title,
|
"Job Title (Primary)": job.title,
|
||||||
|
@ -70,61 +62,35 @@ def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
|
||||||
"Salary Min": job.compensation.min_amount if job.compensation else "",
|
"Salary Min": job.compensation.min_amount if job.compensation else "",
|
||||||
"Salary Max": job.compensation.max_amount if job.compensation else "",
|
"Salary Max": job.compensation.max_amount if job.compensation else "",
|
||||||
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided",
|
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided",
|
||||||
"Location City": location_city,
|
"Location City": job.location.city if job.location and job.location.city else "Unknown",
|
||||||
"Location State": location_state,
|
"Location State": location_state,
|
||||||
"Location Country": location_country,
|
"Location Country": str(job.location.country) if job.location and job.location.country else "Unknown",
|
||||||
"Job URL": job.job_url,
|
"Job URL": job.job_url,
|
||||||
"Job Description": job.description.replace(",", "") if job.description else "No description available",
|
"Job Description": job.description.replace(",", "") if job.description else "No description",
|
||||||
"Job Source": source_name
|
"Job Source": source_name
|
||||||
})
|
})
|
||||||
else:
|
|
||||||
print(f"❌ Ignored (Wrong State): {job.title} - {location_city}, {location_state}")
|
|
||||||
else:
|
|
||||||
print(f"⏳ Ignored (Too Old): {job.title} - {location_city}, {location_state}")
|
|
||||||
|
|
||||||
print(f"\n✅ {len(all_jobs)} jobs retrieved for user {user_email}")
|
|
||||||
return all_jobs
|
return all_jobs
|
||||||
|
|
||||||
|
def save_jobs_to_csv(jobs, filename):
|
||||||
def save_jobs_to_csv(jobs, user_email):
|
|
||||||
"""Save job data to a user-specific CSV file using custom delimiter."""
|
|
||||||
if not jobs:
|
if not jobs:
|
||||||
print("⚠️ No jobs found matching criteria.")
|
print("⚠️ No jobs found.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Clean the email to create a safe filename
|
fieldnames = list(jobs[0].keys())
|
||||||
safe_email = user_email.replace("@", "_at_").replace(".", "_")
|
header = "|~|".join(fieldnames)
|
||||||
filename = f"jobspy_output_dynamic_{safe_email}.csv"
|
records = [header]
|
||||||
|
|
||||||
# Remove old file if it exists
|
for job in jobs:
|
||||||
if os.path.exists(filename):
|
row = [str(job.get(field, "Not Provided")).replace(",", "") for field in fieldnames]
|
||||||
os.remove(filename)
|
records.append("|~|".join(row))
|
||||||
|
|
||||||
fieldnames = [
|
output = ",".join(records)
|
||||||
"Job ID", "Job Title (Primary)", "Company Name", "Industry",
|
with open(filename, "w", encoding="utf-8") as f:
|
||||||
"Experience Level", "Job Type", "Is Remote", "Currency",
|
f.write(output)
|
||||||
"Salary Min", "Salary Max", "Date Posted", "Location City",
|
|
||||||
"Location State", "Location Country", "Job URL", "Job Description",
|
|
||||||
"Job Source", "User Email"
|
|
||||||
]
|
|
||||||
|
|
||||||
with open(filename, mode="w", newline="", encoding="utf-8") as file:
|
print(f"✅ Saved {len(jobs)} jobs to {filename}")
|
||||||
writer = csv.DictWriter(file, fieldnames=fieldnames, delimiter="|")
|
|
||||||
writer.writeheader()
|
|
||||||
for job in jobs:
|
|
||||||
job["User Email"] = user_email
|
|
||||||
writer.writerow(job)
|
|
||||||
|
|
||||||
print(f"📄 File saved: {filename} ({len(jobs)} entries)")
|
# Run
|
||||||
return filename
|
scraped_jobs = scrape_jobs()
|
||||||
|
save_jobs_to_csv(scraped_jobs, output_filename)
|
||||||
|
|
||||||
# Run the scraper and save the results to a user-specific output file
|
|
||||||
job_data = scrape_jobs(
|
|
||||||
search_terms=search_terms,
|
|
||||||
results_wanted=results_wanted,
|
|
||||||
max_days_old=max_days_old,
|
|
||||||
target_state=target_state
|
|
||||||
)
|
|
||||||
|
|
||||||
output_filename = save_jobs_to_csv(job_data, user_email)
|
|
||||||
|
|
Loading…
Reference in New Issue