Compare commits

..

No commits in common. "1138a1b10b87851b9dcdd5139398fa2d81cde2c8" and "c073ea08fd701eb2362394ee09566ebc61eedee1" have entirely different histories.

3 changed files with 75 additions and 123 deletions

View File

@ -1,63 +1,45 @@
name: JobSpy Scraper Dynamic Workflow name: JobSpy Scraper Dynamic Workflow
on: on:
workflow_dispatch: workflow_dispatch:
inputs: inputs:
user_email: user_email:
description: 'Email of user' description: 'Email of user'
required: true required: true
default: 'Branden@autoemployme.onmicrosoft.com' run_id:
description: 'Run ID from Power Automate'
required: true
permissions: permissions:
contents: read contents: read
id-token: write id-token: write
jobs: jobs:
scrape_jobs: scrape_jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout Repo - name: Checkout Repo
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Set Up Python - name: Set Up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: '3.10' python-version: '3.10'
- name: Sanitize Email
- name: Install Dependencies id: vars
run: |
pip install --upgrade pip
pip install -r requirements.txt
- name: Sanitize Email (Preserve Case)
id: sanitize
run: | run: |
raw_email="${{ github.event.inputs.user_email }}" raw_email="${{ github.event.inputs.user_email }}"
safe_email=$(echo "$raw_email" | sed 's/@/_at_/g; s/\./_/g') safe_email=$(echo "$raw_email" | sed 's/@/_at_/g; s/\./_/g')
echo "safe_email=$safe_email" >> $GITHUB_OUTPUT echo "safe_email=$safe_email" >> $GITHUB_OUTPUT
- name: Run Job Scraper
- name: Ensure outputs folder exists
run: mkdir -p outputs
- name: Run Job Scraper with Config
run: | run: |
python job_scraper_dynamic.py "${{ github.event.inputs.user_email }}" python job_scraper_dynamic.py "${{ github.event.inputs.user_email }}" "${{ github.event.inputs.run_id }}"
- name: Upload Output Artifact - name: Upload Output Artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: jobspy_output_${{ steps.sanitize.outputs.safe_email }} name: jobspy_output_${{ steps.vars.outputs.safe_email }}_${{ github.event.inputs.run_id }}
path: outputs/jobspy_output_${{ steps.sanitize.outputs.safe_email }}.csv path: outputs/jobspy_output_${{ steps.vars.outputs.safe_email }}_${{ github.event.inputs.run_id }}.csv

View File

@ -1 +1 @@
{"search_terms":["Marketing"],"results_wanted":"50\n","max_days_old":"1\n","target_state":"NY","user_email":"Branden@autoemployme.onmicrosoft.com"} {"search_terms":["Gym sales"," test"],"results_wanted":"50\n","max_days_old":"1\n","target_state":"NY","user_email":"Branden@autoemployme.onmicrosoft.com"}

View File

@ -1,14 +1,10 @@
import csv import csv, datetime, os, sys, json
import datetime
import os
import sys
import json
from jobspy.google import Google from jobspy.google import Google
from jobspy.linkedin import LinkedIn from jobspy.linkedin import LinkedIn
from jobspy.indeed import Indeed from jobspy.indeed import Indeed
from jobspy.model import ScraperInput from jobspy.model import ScraperInput
# Define job sources # Define sources
sources = { sources = {
"google": Google, "google": Google,
"linkedin": LinkedIn, "linkedin": LinkedIn,
@ -18,118 +14,92 @@ sources = {
def sanitize_email(email): def sanitize_email(email):
return email.replace("@", "_at_").replace(".", "_") return email.replace("@", "_at_").replace(".", "_")
def load_config_file(email=None): def load_config(email):
if email: safe_email = sanitize_email(email)
safe_email = sanitize_email(email) config_path = os.path.join("configs", f"config_{safe_email}.json")
config_path = os.path.join("configs", f"config_{safe_email}.json") if not os.path.exists(config_path):
if os.path.exists(config_path): raise FileNotFoundError(f"❌ Config for {email} not found at {config_path}")
print(f"📂 Loading config for {email}{config_path}") with open(config_path, "r", encoding="utf-8") as f:
with open(config_path, "r", encoding="utf-8") as f: return json.load(f), safe_email
return json.load(f), safe_email
else:
raise FileNotFoundError(f"❌ Config for {email} not found at {config_path}")
else:
raise ValueError("❌ Email must be passed as argument")
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state): def scrape_jobs(search_terms, results_wanted_str, max_days_old_str, target_state):
# Ensure numeric values are converted # Convert string values to integers
results_wanted = int(results_wanted) results_wanted = int(results_wanted_str.strip())
max_days_old = int(max_days_old) max_days_old = int(max_days_old_str.strip())
all_jobs = []
today = datetime.date.today() today = datetime.date.today()
print(f"\n🔍 Scraping jobs for: {search_terms}") all_jobs = []
for term in search_terms: for term in search_terms:
for source_name, source_class in sources.items(): for source, Scraper in sources.items():
print(f"🚀 Scraping '{term}' from {source_name}...") print(f"🔍 Scraping {term} from {source}")
scraper = source_class() scraper = Scraper()
criteria = ScraperInput(site_type=[source_name], search_term=term, results_wanted=results_wanted)
try: try:
response = scraper.scrape(criteria) jobs = scraper.scrape(ScraperInput(
site_type=[source],
search_term=term,
results_wanted=results_wanted
)).jobs
except Exception as e: except Exception as e:
print(f"❌ Error scraping {source_name}: {e}") print(f"⚠️ {source} error: {e}")
continue continue
for job in response.jobs: for job in jobs:
city = job.location.city.strip() if job.location.city else "Unknown"
state = job.location.state.strip().upper() if job.location.state else "Unknown"
country = str(job.location.country) if job.location.country else "Unknown"
if not any(t.lower() in job.title.lower() for t in search_terms):
continue
if job.date_posted and (today - job.date_posted).days <= max_days_old: if job.date_posted and (today - job.date_posted).days <= max_days_old:
if state == target_state or job.is_remote: if target_state == (job.location.state or "").upper() or job.is_remote:
all_jobs.append({ if any(term.lower() in job.title.lower() for term in search_terms):
"Job ID": job.id, all_jobs.append({
"Job Title (Primary)": job.title, "Job ID": job.id,
"Company Name": job.company_name or "Unknown", "Job Title (Primary)": job.title,
"Industry": job.company_industry or "Not Provided", "Company Name": job.company_name or "Unknown",
"Experience Level": job.job_level or "Not Provided", "Industry": job.company_industry or "Not Provided",
"Job Type": job.job_type[0].name if job.job_type else "Not Provided", "Experience Level": job.job_level or "Not Provided",
"Is Remote": job.is_remote, "Job Type": job.job_type[0].name if job.job_type else "Not Provided",
"Currency": job.compensation.currency if job.compensation else "", "Is Remote": job.is_remote,
"Salary Min": job.compensation.min_amount if job.compensation else "", "Currency": job.compensation.currency if job.compensation else "",
"Salary Max": job.compensation.max_amount if job.compensation else "", "Salary Min": job.compensation.min_amount if job.compensation else "",
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided", "Salary Max": job.compensation.max_amount if job.compensation else "",
"Location City": city, "Date Posted": job.date_posted.strftime("%Y-%m-%d"),
"Location State": state, "Location City": job.location.city or "Unknown",
"Location Country": country, "Location State": (job.location.state or "Unknown").upper(),
"Job URL": job.job_url, "Location Country": job.location.country or "Unknown",
"Job Description": job.description.replace(",", "") if job.description else "No description available", "Job URL": job.job_url,
"Job Source": source_name "Job Description": job.description.replace(",", "") if job.description else "No description",
}) "Job Source": source
print(f"{len(all_jobs)} jobs matched.") })
print(f"✅ Found {len(all_jobs)} jobs")
return all_jobs return all_jobs
def save_jobs_to_csv(jobs, output_path): def save_to_csv(jobs, path):
if not jobs: os.makedirs(os.path.dirname(path), exist_ok=True)
print("⚠️ No jobs found.")
return
fieldnames = [ fieldnames = [
"Job ID", "Job Title (Primary)", "Company Name", "Industry", "Job ID", "Job Title (Primary)", "Company Name", "Industry",
"Experience Level", "Job Type", "Is Remote", "Currency", "Experience Level", "Job Type", "Is Remote", "Currency",
"Salary Min", "Salary Max", "Date Posted", "Location City", "Salary Min", "Salary Max", "Date Posted", "Location City",
"Location State", "Location Country", "Job URL", "Job Description", "Location State", "Location Country", "Job URL", "Job Description", "Job Source"
"Job Source"
] ]
header = "|~|".join(fieldnames) header = "|~|".join(fieldnames)
rows = [header] rows = [header] + ["|~|".join(str(job.get(col, "Not Provided")).replace(",", "").strip() for col in fieldnames) for job in jobs]
with open(path, "w", encoding="utf-8") as f:
f.write(",".join(rows))
print(f"💾 Saved output to: {path}")
for job in jobs:
row = []
for field in fieldnames:
value = str(job.get(field, "Not Provided")).replace(",", "").strip()
row.append(value if value else "Not Provided")
rows.append("|~|".join(row))
output = ",".join(rows)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
f.write(output)
print(f"💾 Saved output to: {output_path}")
# MAIN
if __name__ == "__main__": if __name__ == "__main__":
try: try:
user_email = sys.argv[1] if len(sys.argv) >= 2 else None if len(sys.argv) != 3:
config, safe_email = load_config_file(user_email) raise ValueError("❌ Usage: python job_scraper_dynamic.py <user_email> <run_id>")
job_data = scrape_jobs( user_email, run_id = sys.argv[1], sys.argv[2]
search_terms=config["search_terms"], config, safe_email = load_config(user_email)
results_wanted=config["results_wanted"],
max_days_old=config["max_days_old"], jobs = scrape_jobs(
target_state=config["target_state"] config["search_terms"],
config["results_wanted"],
config["max_days_old"],
config["target_state"]
) )
output_file = f"outputs/jobspy_output_{safe_email}.csv" save_to_csv(jobs, f"outputs/jobspy_output_{safe_email}_{run_id}.csv")
save_jobs_to_csv(job_data, output_file)
except Exception as e: except Exception as e:
print(f"❌ Fatal Error: {e}") print(f"❌ Fatal error: {e}")
sys.exit(1) sys.exit(1)