Compare commits

..

18 Commits

Author SHA1 Message Date
JobSpy Bot 1138a1b10b 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 23:19:43 -04:00
JobSpy Bot 4daf19872f 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 23:15:44 -04:00
JobSpy Bot f90b545c2e 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 22:49:52 -04:00
JobSpy Bot 8700e1c4ac 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 22:17:04 -04:00
JobSpy Bot 15538061d7 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 21:44:01 -04:00
JobSpy Bot 3019fc6adb 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 21:00:10 -04:00
JobSpy Bot cce26cd8ae 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 20:28:27 -04:00
JobSpy Bot 663c77efcf 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 19:11:32 -04:00
JobSpy Bot 3719121937 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 18:12:26 -04:00
JobSpy Bot 076d30f17d 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 17:57:00 -04:00
JobSpy Bot a088b4d62c 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 17:24:24 -04:00
JobSpy Bot b513972a3f 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 16:14:57 -04:00
JobSpy Bot c9614dd74e 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 16:10:32 -04:00
fakebranden 5d45628f5c fixed py 2025-04-16 20:06:52 +00:00
fakebranden c310ff61ee modified py for int string error 2025-04-16 19:57:18 +00:00
fakebranden cc7c7f0a1d revert to non runid file 2025-04-16 19:53:43 +00:00
fakebranden 692ae9ca21 remove runid from tigger req 2025-04-16 19:50:46 +00:00
JobSpy Bot 743238350f 🔄 Updated config for Branden@autoemployme.onmicrosoft.com 2025-04-16 15:33:35 -04:00
3 changed files with 124 additions and 76 deletions

View File

@ -1,45 +1,63 @@
name: JobSpy Scraper Dynamic Workflow
on:
workflow_dispatch:
inputs:
user_email:
description: 'Email of user'
required: true
run_id:
description: 'Run ID from Power Automate'
required: true
default: 'Branden@autoemployme.onmicrosoft.com'
permissions:
contents: read
id-token: write
jobs:
scrape_jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout Repo
uses: actions/checkout@v3
- name: Set Up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Sanitize Email
id: vars
- name: Install Dependencies
run: |
pip install --upgrade pip
pip install -r requirements.txt
- name: Sanitize Email (Preserve Case)
id: sanitize
run: |
raw_email="${{ github.event.inputs.user_email }}"
safe_email=$(echo "$raw_email" | sed 's/@/_at_/g; s/\./_/g')
echo "safe_email=$safe_email" >> $GITHUB_OUTPUT
- name: Run Job Scraper
- name: Ensure outputs folder exists
run: mkdir -p outputs
- name: Run Job Scraper with Config
run: |
python job_scraper_dynamic.py "${{ github.event.inputs.user_email }}" "${{ github.event.inputs.run_id }}"
python job_scraper_dynamic.py "${{ github.event.inputs.user_email }}"
- name: Upload Output Artifact
uses: actions/upload-artifact@v4
with:
name: jobspy_output_${{ steps.vars.outputs.safe_email }}_${{ github.event.inputs.run_id }}
path: outputs/jobspy_output_${{ steps.vars.outputs.safe_email }}_${{ github.event.inputs.run_id }}.csv
name: jobspy_output_${{ steps.sanitize.outputs.safe_email }}
path: outputs/jobspy_output_${{ steps.sanitize.outputs.safe_email }}.csv

View File

@ -1 +1 @@
{"search_terms":["Gym sales"," test"],"results_wanted":"50\n","max_days_old":"1\n","target_state":"NY","user_email":"Branden@autoemployme.onmicrosoft.com"}
{"search_terms":["Marketing"],"results_wanted":"50\n","max_days_old":"1\n","target_state":"NY","user_email":"Branden@autoemployme.onmicrosoft.com"}

View File

@ -1,10 +1,14 @@
import csv, datetime, os, sys, json
import csv
import datetime
import os
import sys
import json
from jobspy.google import Google
from jobspy.linkedin import LinkedIn
from jobspy.indeed import Indeed
from jobspy.model import ScraperInput
# Define sources
# Define job sources
sources = {
"google": Google,
"linkedin": LinkedIn,
@ -14,39 +18,50 @@ sources = {
def sanitize_email(email):
return email.replace("@", "_at_").replace(".", "_")
def load_config(email):
def load_config_file(email=None):
if email:
safe_email = sanitize_email(email)
config_path = os.path.join("configs", f"config_{safe_email}.json")
if not os.path.exists(config_path):
raise FileNotFoundError(f"❌ Config for {email} not found at {config_path}")
if os.path.exists(config_path):
print(f"📂 Loading config for {email} {config_path}")
with open(config_path, "r", encoding="utf-8") as f:
return json.load(f), safe_email
else:
raise FileNotFoundError(f"❌ Config for {email} not found at {config_path}")
else:
raise ValueError("❌ Email must be passed as argument")
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
# Ensure numeric values are converted
results_wanted = int(results_wanted)
max_days_old = int(max_days_old)
def scrape_jobs(search_terms, results_wanted_str, max_days_old_str, target_state):
# Convert string values to integers
results_wanted = int(results_wanted_str.strip())
max_days_old = int(max_days_old_str.strip())
today = datetime.date.today()
all_jobs = []
today = datetime.date.today()
print(f"\n🔍 Scraping jobs for: {search_terms}")
for term in search_terms:
for source, Scraper in sources.items():
print(f"🔍 Scraping {term} from {source}")
scraper = Scraper()
for source_name, source_class in sources.items():
print(f"🚀 Scraping '{term}' from {source_name}...")
scraper = source_class()
criteria = ScraperInput(site_type=[source_name], search_term=term, results_wanted=results_wanted)
try:
jobs = scraper.scrape(ScraperInput(
site_type=[source],
search_term=term,
results_wanted=results_wanted
)).jobs
response = scraper.scrape(criteria)
except Exception as e:
print(f"⚠️ {source} error: {e}")
print(f"❌ Error scraping {source_name}: {e}")
continue
for job in response.jobs:
city = job.location.city.strip() if job.location.city else "Unknown"
state = job.location.state.strip().upper() if job.location.state else "Unknown"
country = str(job.location.country) if job.location.country else "Unknown"
if not any(t.lower() in job.title.lower() for t in search_terms):
continue
for job in jobs:
if job.date_posted and (today - job.date_posted).days <= max_days_old:
if target_state == (job.location.state or "").upper() or job.is_remote:
if any(term.lower() in job.title.lower() for term in search_terms):
if state == target_state or job.is_remote:
all_jobs.append({
"Job ID": job.id,
"Job Title (Primary)": job.title,
@ -58,48 +73,63 @@ def scrape_jobs(search_terms, results_wanted_str, max_days_old_str, target_state
"Currency": job.compensation.currency if job.compensation else "",
"Salary Min": job.compensation.min_amount if job.compensation else "",
"Salary Max": job.compensation.max_amount if job.compensation else "",
"Date Posted": job.date_posted.strftime("%Y-%m-%d"),
"Location City": job.location.city or "Unknown",
"Location State": (job.location.state or "Unknown").upper(),
"Location Country": job.location.country or "Unknown",
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided",
"Location City": city,
"Location State": state,
"Location Country": country,
"Job URL": job.job_url,
"Job Description": job.description.replace(",", "") if job.description else "No description",
"Job Source": source
"Job Description": job.description.replace(",", "") if job.description else "No description available",
"Job Source": source_name
})
print(f"Found {len(all_jobs)} jobs")
print(f"{len(all_jobs)} jobs matched.")
return all_jobs
def save_to_csv(jobs, path):
os.makedirs(os.path.dirname(path), exist_ok=True)
def save_jobs_to_csv(jobs, output_path):
if not jobs:
print("⚠️ No jobs found.")
return
fieldnames = [
"Job ID", "Job Title (Primary)", "Company Name", "Industry",
"Experience Level", "Job Type", "Is Remote", "Currency",
"Salary Min", "Salary Max", "Date Posted", "Location City",
"Location State", "Location Country", "Job URL", "Job Description", "Job Source"
"Location State", "Location Country", "Job URL", "Job Description",
"Job Source"
]
header = "|~|".join(fieldnames)
rows = [header] + ["|~|".join(str(job.get(col, "Not Provided")).replace(",", "").strip() for col in fieldnames) for job in jobs]
with open(path, "w", encoding="utf-8") as f:
f.write(",".join(rows))
print(f"💾 Saved output to: {path}")
header = "|~|".join(fieldnames)
rows = [header]
for job in jobs:
row = []
for field in fieldnames:
value = str(job.get(field, "Not Provided")).replace(",", "").strip()
row.append(value if value else "Not Provided")
rows.append("|~|".join(row))
output = ",".join(rows)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
f.write(output)
print(f"💾 Saved output to: {output_path}")
# MAIN
if __name__ == "__main__":
try:
if len(sys.argv) != 3:
raise ValueError("❌ Usage: python job_scraper_dynamic.py <user_email> <run_id>")
user_email = sys.argv[1] if len(sys.argv) >= 2 else None
config, safe_email = load_config_file(user_email)
user_email, run_id = sys.argv[1], sys.argv[2]
config, safe_email = load_config(user_email)
jobs = scrape_jobs(
config["search_terms"],
config["results_wanted"],
config["max_days_old"],
config["target_state"]
job_data = scrape_jobs(
search_terms=config["search_terms"],
results_wanted=config["results_wanted"],
max_days_old=config["max_days_old"],
target_state=config["target_state"]
)
save_to_csv(jobs, f"outputs/jobspy_output_{safe_email}_{run_id}.csv")
output_file = f"outputs/jobspy_output_{safe_email}.csv"
save_jobs_to_csv(job_data, output_file)
except Exception as e:
print(f"❌ Fatal error: {e}")
print(f"❌ Fatal Error: {e}")
sys.exit(1)