mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-04 19:44:30 -08:00
Compare commits
42 Commits
1.1.82
...
77cc1f8550
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77cc1f8550 | ||
|
|
84b4524c43 | ||
|
|
e6ae23c76f | ||
|
|
0103e11234 | ||
|
|
697ae5c8c9 | ||
|
|
9e0674f7fc | ||
|
|
bbdad3584e | ||
|
|
a045bb442a | ||
|
|
3eb4c122e7 | ||
|
|
74877c5fd8 | ||
|
|
0a475e312f | ||
|
|
e0514d218e | ||
|
|
529aa8a1f4 | ||
|
|
93a21941eb | ||
|
|
8f8b39c6e2 | ||
|
|
cdcd79edfe | ||
|
|
89a40dc3e3 | ||
|
|
6a326b7dd4 | ||
|
|
0a5c5fa9b3 | ||
|
|
e22e4cc092 | ||
|
|
0abe28fae4 | ||
|
|
31d0389dd8 | ||
|
|
fb9ab3a315 | ||
|
|
c34eff610f | ||
|
|
e9160a0b4c | ||
|
|
cd916c7978 | ||
|
|
25c084ca2c | ||
|
|
341deba465 | ||
|
|
5337b3ec7f | ||
|
|
0171ecc4a0 | ||
|
|
e191405c8e | ||
|
|
a2d139cb96 | ||
|
|
9e41e6e9db | ||
|
|
bb7d4c55ed | ||
|
|
58cc1937bb | ||
|
|
60819a8fca | ||
|
|
1c59cd6738 | ||
|
|
eed96e4c04 | ||
|
|
83c64f4bca | ||
|
|
d8ad9da1c0 | ||
|
|
5f5738eaaa | ||
|
|
e1da326317 |
49
.github/workflows/job_scraper_dynamic.yml
vendored
Normal file
49
.github/workflows/job_scraper_dynamic.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: JobSpy Scraper Dynamic Workflow
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
user_email:
|
||||
description: 'Email of user'
|
||||
required: true
|
||||
default: 'Branden@autoemployme.onmicrosoft.com'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
scrape_jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set Up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Sanitize Email + Create Run ID
|
||||
id: vars
|
||||
run: |
|
||||
safe_email=$(echo "${{ github.event.inputs.user_email }}" | sed 's/@/_at_/g; s/\./_/g')
|
||||
run_id=$(date +%s)
|
||||
echo "safe_email=$safe_email" >> $GITHUB_OUTPUT
|
||||
echo "run_id=$run_id" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Run Job Scraper
|
||||
run: |
|
||||
python job_scraper_dynamic.py "${{ github.event.inputs.user_email }}" "${{ steps.vars.outputs.run_id }}"
|
||||
|
||||
- name: Upload Output Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: jobspy_output_${{ steps.vars.outputs.safe_email }}_${{ steps.vars.outputs.run_id }}
|
||||
path: outputs/jobspy_output_${{ steps.vars.outputs.safe_email }}_${{ steps.vars.outputs.run_id }}.csv
|
||||
48
.github/workflows/jobspy_scraper.yml
vendored
Normal file
48
.github/workflows/jobspy_scraper.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
name: JobSpy Scraper Workflow
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Allows manual trigger from GitHub or Power Automate
|
||||
# Remove or comment out the schedule to prevent auto-runs
|
||||
# schedule:
|
||||
# - cron: '0 */6 * * *' # Runs every 6 hours (DISABLED)
|
||||
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
scrape_jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Run JobSpy Scraper
|
||||
run: python job_scraper_exact_match.py
|
||||
|
||||
- name: Debug - Check if jobspy_output.csv exists
|
||||
run: |
|
||||
if [ ! -f jobspy_output.csv ]; then
|
||||
echo "❌ ERROR: jobspy_output.csv not found!"
|
||||
exit 1
|
||||
else
|
||||
echo "✅ jobspy_output.csv found, proceeding to upload..."
|
||||
fi
|
||||
|
||||
- name: Upload JobSpy Output as Artifact
|
||||
uses: actions/upload-artifact@v4 # Explicitly using latest version
|
||||
with:
|
||||
name: jobspy-results
|
||||
path: jobspy_output.csv
|
||||
8
configs/config.json
Normal file
8
configs/config.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"search_terms": ["IT Support", "Help Desk"],
|
||||
"results_wanted": 50,
|
||||
"max_days_old": 7,
|
||||
"target_state": "NY",
|
||||
"user_email": "Branden@autoemployme.onmicrosoft.com"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"search_terms": ["Testing", "Help Desk", "Support"],
|
||||
"results_wanted": 50,
|
||||
"max_days_old": 7,
|
||||
"target_state": "NY",
|
||||
"user_email": "Branden@autoemployme.onmicrosoft.com"
|
||||
}
|
||||
|
||||
116
job_scraper.py
Normal file
116
job_scraper.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import csv
|
||||
import datetime
|
||||
from jobspy.google import Google
|
||||
from jobspy.linkedin import LinkedIn
|
||||
from jobspy.indeed import Indeed
|
||||
from jobspy.ziprecruiter import ZipRecruiter
|
||||
from jobspy.model import ScraperInput
|
||||
|
||||
# Define job sources
|
||||
sources = {
|
||||
"google": Google,
|
||||
"linkedin": LinkedIn,
|
||||
"indeed": Indeed,
|
||||
"zip_recruiter": ZipRecruiter,
|
||||
}
|
||||
|
||||
# Define search preferences
|
||||
search_terms = ["Automation Engineer", "CRM Manager", "Implementation Specialist"]
|
||||
results_wanted = 200 # Fetch more jobs
|
||||
max_days_old = 2 # Fetch jobs posted in last 48 hours
|
||||
target_state = "NY" # Only keep jobs from New York
|
||||
|
||||
|
||||
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
|
||||
"""Scrape jobs from multiple sources and filter by state."""
|
||||
all_jobs = []
|
||||
today = datetime.date.today()
|
||||
print("\n🔎 DEBUG: Fetching jobs for search terms:", search_terms)
|
||||
|
||||
for search_term in search_terms:
|
||||
for source_name, source_class in sources.items():
|
||||
print(f"\n🚀 Scraping {search_term} from {source_name}...")
|
||||
|
||||
scraper = source_class()
|
||||
search_criteria = ScraperInput(
|
||||
site_type=[source_name],
|
||||
search_term=search_term,
|
||||
results_wanted=results_wanted,
|
||||
)
|
||||
|
||||
job_response = scraper.scrape(search_criteria)
|
||||
|
||||
for job in job_response.jobs:
|
||||
# Normalize location fields
|
||||
location_city = job.location.city.strip() if job.location.city else "Unknown"
|
||||
location_state = job.location.state.strip().upper() if job.location.state else "Unknown"
|
||||
location_country = str(job.location.country) if job.location.country else "Unknown"
|
||||
|
||||
# Debug: Show all jobs being fetched
|
||||
print(f"📍 Fetched Job: {job.title} - {location_city}, {location_state}, {location_country}")
|
||||
|
||||
# Ensure the job is recent
|
||||
if job.date_posted and (today - job.date_posted).days <= max_days_old:
|
||||
if location_state == target_state or job.is_remote:
|
||||
print(f"✅ MATCH (In NY or Remote): {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
|
||||
|
||||
all_jobs.append({
|
||||
"Job ID": job.id,
|
||||
"Job Title (Primary)": job.title,
|
||||
"Company Name": job.company_name if job.company_name else "Unknown",
|
||||
"Industry": job.company_industry if job.company_industry else "Not Provided",
|
||||
"Experience Level": job.job_level if job.job_level else "Not Provided",
|
||||
"Job Type": job.job_type[0].name if job.job_type else "Not Provided",
|
||||
"Is Remote": job.is_remote,
|
||||
"Currency": job.compensation.currency if job.compensation else "",
|
||||
"Salary Min": job.compensation.min_amount if job.compensation else "",
|
||||
"Salary Max": job.compensation.max_amount if job.compensation else "",
|
||||
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided",
|
||||
"Location City": location_city,
|
||||
"Location State": location_state,
|
||||
"Location Country": location_country,
|
||||
"Job URL": job.job_url,
|
||||
"Job Description": job.description[:500] if job.description else "No description available",
|
||||
"Job Source": source_name
|
||||
})
|
||||
else:
|
||||
print(f"❌ Ignored (Wrong State): {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
|
||||
else:
|
||||
print(f"⏳ Ignored (Too Old): {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
|
||||
|
||||
print(f"\n✅ {len(all_jobs)} jobs retrieved in NY")
|
||||
return all_jobs
|
||||
|
||||
|
||||
def save_jobs_to_csv(jobs, filename="jobspy_output.csv"):
|
||||
"""Save job data to a CSV file."""
|
||||
if not jobs:
|
||||
print("⚠️ No jobs found matching criteria.")
|
||||
return
|
||||
|
||||
fieldnames = [
|
||||
"Job ID", "Job Title (Primary)", "Company Name", "Industry",
|
||||
"Experience Level", "Job Type", "Is Remote", "Currency",
|
||||
"Salary Min", "Salary Max", "Date Posted", "Location City",
|
||||
"Location State", "Location Country", "Job URL", "Job Description",
|
||||
"Job Source"
|
||||
]
|
||||
|
||||
with open(filename, mode="w", newline="", encoding="utf-8") as file:
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(jobs)
|
||||
|
||||
print(f"✅ Jobs saved to {filename} ({len(jobs)} entries)")
|
||||
|
||||
|
||||
# Run the scraper with multiple job searches
|
||||
job_data = scrape_jobs(
|
||||
search_terms=search_terms,
|
||||
results_wanted=results_wanted,
|
||||
max_days_old=max_days_old,
|
||||
target_state=target_state
|
||||
)
|
||||
|
||||
# Save results to CSV
|
||||
save_jobs_to_csv(job_data)
|
||||
94
job_scraper_dynamic.py
Normal file
94
job_scraper_dynamic.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import csv, datetime, os, sys, json
|
||||
from jobspy.google import Google
|
||||
from jobspy.linkedin import LinkedIn
|
||||
from jobspy.indeed import Indeed
|
||||
from jobspy.model import ScraperInput
|
||||
|
||||
sources = {
|
||||
"google": Google,
|
||||
"linkedin": LinkedIn,
|
||||
"indeed": Indeed,
|
||||
}
|
||||
|
||||
def sanitize_email(email):
|
||||
return email.replace("@", "_at_").replace(".", "_")
|
||||
|
||||
def load_config(email):
|
||||
safe_email = sanitize_email(email)
|
||||
config_path = os.path.join("configs", f"config_{safe_email}.json")
|
||||
if not os.path.exists(config_path):
|
||||
raise FileNotFoundError(f"❌ Config for {email} not found at {config_path}")
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
return json.load(f), safe_email
|
||||
|
||||
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
|
||||
today = datetime.date.today()
|
||||
all_jobs = []
|
||||
|
||||
for term in search_terms:
|
||||
for source, Scraper in sources.items():
|
||||
print(f"🔍 Scraping {term} from {source}")
|
||||
scraper = Scraper()
|
||||
try:
|
||||
jobs = scraper.scrape(ScraperInput(
|
||||
site_type=[source],
|
||||
search_term=term,
|
||||
results_wanted=results_wanted
|
||||
)).jobs
|
||||
except Exception as e:
|
||||
print(f"⚠️ {source} error: {e}")
|
||||
continue
|
||||
|
||||
for job in jobs:
|
||||
if job.date_posted and (today - job.date_posted).days <= max_days_old:
|
||||
if target_state == (job.location.state or "").upper() or job.is_remote:
|
||||
if any(term.lower() in job.title.lower() for term in search_terms):
|
||||
all_jobs.append({
|
||||
"Job ID": job.id,
|
||||
"Job Title (Primary)": job.title,
|
||||
"Company Name": job.company_name or "Unknown",
|
||||
"Industry": job.company_industry or "Not Provided",
|
||||
"Experience Level": job.job_level or "Not Provided",
|
||||
"Job Type": job.job_type[0].name if job.job_type else "Not Provided",
|
||||
"Is Remote": job.is_remote,
|
||||
"Currency": job.compensation.currency if job.compensation else "",
|
||||
"Salary Min": job.compensation.min_amount if job.compensation else "",
|
||||
"Salary Max": job.compensation.max_amount if job.compensation else "",
|
||||
"Date Posted": job.date_posted.strftime("%Y-%m-%d"),
|
||||
"Location City": job.location.city or "Unknown",
|
||||
"Location State": (job.location.state or "Unknown").upper(),
|
||||
"Location Country": job.location.country or "Unknown",
|
||||
"Job URL": job.job_url,
|
||||
"Job Description": job.description.replace(",", "") if job.description else "No description",
|
||||
"Job Source": source
|
||||
})
|
||||
print(f"✅ Found {len(all_jobs)} jobs")
|
||||
return all_jobs
|
||||
|
||||
def save_to_csv(jobs, path):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
fieldnames = [
|
||||
"Job ID", "Job Title (Primary)", "Company Name", "Industry",
|
||||
"Experience Level", "Job Type", "Is Remote", "Currency",
|
||||
"Salary Min", "Salary Max", "Date Posted", "Location City",
|
||||
"Location State", "Location Country", "Job URL", "Job Description", "Job Source"
|
||||
]
|
||||
header = "|~|".join(fieldnames)
|
||||
rows = [header] + ["|~|".join(str(job.get(col, "Not Provided")).replace(",", "").strip() for col in fieldnames) for job in jobs]
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(",".join(rows))
|
||||
print(f"💾 Saved output to: {path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
if len(sys.argv) != 3:
|
||||
raise ValueError("❌ Usage: python job_scraper_dynamic.py <user_email> <run_id>")
|
||||
|
||||
user_email, run_id = sys.argv[1], sys.argv[2]
|
||||
config, safe_email = load_config(user_email)
|
||||
jobs = scrape_jobs(config["search_terms"], config["results_wanted"], config["max_days_old"], config["target_state"])
|
||||
save_to_csv(jobs, f"outputs/jobspy_output_{safe_email}_{run_id}.csv")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Fatal error: {e}")
|
||||
sys.exit(1)
|
||||
146
job_scraper_exact_match.py
Normal file
146
job_scraper_exact_match.py
Normal file
@@ -0,0 +1,146 @@
|
||||
import csv
|
||||
import datetime
|
||||
import os
|
||||
from jobspy.google import Google
|
||||
from jobspy.linkedin import LinkedIn
|
||||
from jobspy.indeed import Indeed
|
||||
from jobspy.model import ScraperInput
|
||||
|
||||
# Define job sources
|
||||
sources = {
|
||||
"google": Google,
|
||||
"linkedin": LinkedIn,
|
||||
"indeed": Indeed,
|
||||
}
|
||||
|
||||
# Define search preferences
|
||||
search_terms = ["Automation Engineer", "CRM Manager", "Implementation Specialist", "CRM", "Project Manager", "POS", "Microsoft Power", "IT Support"]
|
||||
results_wanted = 100 # Fetch more jobs
|
||||
max_days_old = 2 # Fetch jobs posted in last 48 hours
|
||||
target_state = "NY" # Only keep jobs from New York
|
||||
|
||||
def scrape_jobs(search_terms, results_wanted, max_days_old, target_state):
|
||||
"""Scrape jobs from multiple sources and filter by state."""
|
||||
all_jobs = []
|
||||
today = datetime.date.today()
|
||||
|
||||
print("\n🔎 DEBUG: Fetching jobs for search terms:", search_terms)
|
||||
|
||||
for search_term in search_terms:
|
||||
for source_name, source_class in sources.items():
|
||||
print(f"\n🚀 Scraping {search_term} from {source_name}...")
|
||||
|
||||
scraper = source_class()
|
||||
search_criteria = ScraperInput(
|
||||
site_type=[source_name],
|
||||
search_term=search_term,
|
||||
results_wanted=results_wanted,
|
||||
)
|
||||
|
||||
job_response = scraper.scrape(search_criteria)
|
||||
|
||||
for job in job_response.jobs:
|
||||
# Normalize location fields
|
||||
location_city = job.location.city.strip() if job.location.city else "Unknown"
|
||||
location_state = job.location.state.strip().upper() if job.location.state else "Unknown"
|
||||
location_country = str(job.location.country) if job.location.country else "Unknown"
|
||||
|
||||
# Debug: Show all jobs being fetched
|
||||
print(f"📍 Fetched Job: {job.title} - {location_city}, {location_state}, {location_country}")
|
||||
|
||||
# Exclude jobs that don’t explicitly match the search terms
|
||||
if not any(term.lower() in job.title.lower() for term in search_terms):
|
||||
print(f"🚫 Excluding: {job.title} (Doesn't match {search_terms})")
|
||||
continue # Skip this job
|
||||
|
||||
# Ensure the job is recent
|
||||
if job.date_posted and (today - job.date_posted).days <= max_days_old:
|
||||
# Only accept jobs if they're in NY or Remote
|
||||
if location_state == target_state or job.is_remote:
|
||||
print(f"✅ MATCH: {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
|
||||
all_jobs.append({
|
||||
"Job ID": job.id,
|
||||
"Job Title (Primary)": job.title,
|
||||
"Company Name": job.company_name if job.company_name else "Unknown",
|
||||
"Industry": job.company_industry if job.company_industry else "Not Provided",
|
||||
"Experience Level": job.job_level if job.job_level else "Not Provided",
|
||||
"Job Type": job.job_type[0].name if job.job_type else "Not Provided",
|
||||
"Is Remote": job.is_remote,
|
||||
"Currency": job.compensation.currency if job.compensation else "",
|
||||
"Salary Min": job.compensation.min_amount if job.compensation else "",
|
||||
"Salary Max": job.compensation.max_amount if job.compensation else "",
|
||||
"Date Posted": job.date_posted.strftime("%Y-%m-%d") if job.date_posted else "Not Provided",
|
||||
"Location City": location_city,
|
||||
"Location State": location_state,
|
||||
"Location Country": location_country,
|
||||
"Job URL": job.job_url,
|
||||
"Job Description": job.description.replace(",", "") if job.description else "No description available",
|
||||
"Job Source": source_name
|
||||
})
|
||||
else:
|
||||
print(f"❌ Ignored (Wrong State): {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
|
||||
else:
|
||||
print(f"⏳ Ignored (Too Old): {job.title} - {location_city}, {location_state} (Posted {job.date_posted})")
|
||||
|
||||
print(f"\n✅ {len(all_jobs)} jobs retrieved in NY")
|
||||
return all_jobs
|
||||
|
||||
|
||||
def save_jobs_to_csv(jobs, filename="jobspy_output.csv"):
|
||||
"""Save job data to a CSV file with custom formatting:
|
||||
- Fields within a record are separated by the custom delimiter |~|
|
||||
- Records are separated by a comma
|
||||
- All commas in field values are removed
|
||||
- Blank fields are replaced with 'Not Provided'
|
||||
"""
|
||||
if not jobs:
|
||||
print("⚠️ No jobs found matching criteria.")
|
||||
return
|
||||
|
||||
# Remove old CSV file before writing
|
||||
if os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
|
||||
fieldnames = [
|
||||
"Job ID", "Job Title (Primary)", "Company Name", "Industry",
|
||||
"Experience Level", "Job Type", "Is Remote", "Currency",
|
||||
"Salary Min", "Salary Max", "Date Posted", "Location City",
|
||||
"Location State", "Location Country", "Job URL", "Job Description",
|
||||
"Job Source"
|
||||
]
|
||||
|
||||
# Build header record using custom field delimiter
|
||||
header_record = "|~|".join(fieldnames)
|
||||
records = [header_record]
|
||||
|
||||
for job in jobs:
|
||||
row = []
|
||||
for field in fieldnames:
|
||||
value = str(job.get(field, "")).strip()
|
||||
if not value:
|
||||
value = "Not Provided"
|
||||
# Remove all commas from the value
|
||||
value = value.replace(",", "")
|
||||
row.append(value)
|
||||
# Join fields with the custom delimiter
|
||||
record = "|~|".join(row)
|
||||
records.append(record)
|
||||
|
||||
# Join records with a comma as the record separator
|
||||
output = ",".join(records)
|
||||
with open(filename, "w", encoding="utf-8") as file:
|
||||
file.write(output)
|
||||
|
||||
print(f"✅ Jobs saved to {filename} ({len(jobs)} entries)")
|
||||
|
||||
|
||||
# Run the scraper with multiple job searches
|
||||
job_data = scrape_jobs(
|
||||
search_terms=search_terms,
|
||||
results_wanted=results_wanted,
|
||||
max_days_old=max_days_old,
|
||||
target_state=target_state
|
||||
)
|
||||
|
||||
# Save results to CSV with custom formatting
|
||||
save_jobs_to_csv(job_data)
|
||||
@@ -205,6 +205,8 @@ class Indeed(Scraper):
|
||||
description = job["description"]["html"]
|
||||
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN:
|
||||
description = markdown_converter(description)
|
||||
description = description.replace(",", "")
|
||||
|
||||
|
||||
job_type = get_job_type(job["attributes"])
|
||||
timestamp_seconds = job["datePublished"] / 1000
|
||||
|
||||
@@ -58,14 +58,11 @@ def is_job_remote(job: dict, description: str) -> bool:
|
||||
any(keyword in attr["label"].lower() for keyword in remote_keywords)
|
||||
for attr in job["attributes"]
|
||||
)
|
||||
is_remote_in_description = any(
|
||||
keyword in description.lower() for keyword in remote_keywords
|
||||
)
|
||||
is_remote_in_location = any(
|
||||
keyword in job["location"]["formatted"]["long"].lower()
|
||||
for keyword in remote_keywords
|
||||
)
|
||||
return is_remote_in_attributes or is_remote_in_description or is_remote_in_location
|
||||
return is_remote_in_attributes or is_remote_in_location
|
||||
|
||||
|
||||
def get_compensation_interval(interval: str) -> CompensationInterval:
|
||||
|
||||
@@ -217,6 +217,8 @@ class LinkedIn(Scraper):
|
||||
job_details = {}
|
||||
if full_descr:
|
||||
job_details = self._get_job_details(job_id)
|
||||
description = description.replace(",", "")
|
||||
|
||||
|
||||
return JobPost(
|
||||
id=f"li-{job_id}",
|
||||
|
||||
@@ -216,4 +216,4 @@ class ZipRecruiter(Scraper):
|
||||
Sends a session event to the API with device properties.
|
||||
"""
|
||||
url = f"{self.api_url}/jobs-app/event"
|
||||
self.session.post(url, data=get_cookie_data)
|
||||
self.session.post(url, data=get_cookie_data)
|
||||
@@ -28,4 +28,4 @@ def get_job_type_enum(job_type_str: str) -> list[JobType] | None:
|
||||
for job_type in JobType:
|
||||
if job_type_str in job_type.value:
|
||||
return [job_type]
|
||||
return None
|
||||
return None
|
||||
1159
jobspy_output.csv
Normal file
1159
jobspy_output.csv
Normal file
File diff suppressed because it is too large
Load Diff
236
poetry.lock
generated
236
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -17,7 +17,7 @@ include = "jobspy"
|
||||
line-length = 88
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
python = "^3.10 || ^3.12"
|
||||
requests = "^2.31.0"
|
||||
beautifulsoup4 = "^4.12.2"
|
||||
pandas = "^2.1.0"
|
||||
|
||||
118
requirements.txt
Normal file
118
requirements.txt
Normal file
@@ -0,0 +1,118 @@
|
||||
annotated-types==0.7.0
|
||||
anyio==4.6.2.post1
|
||||
argon2-cffi==23.1.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
arrow==1.3.0
|
||||
asttokens==2.4.1
|
||||
async-lru==2.0.4
|
||||
attrs==24.2.0
|
||||
babel==2.16.0
|
||||
beautifulsoup4==4.12.3
|
||||
black==24.10.0
|
||||
bleach==6.1.0
|
||||
certifi==2024.8.30
|
||||
cffi==1.17.1
|
||||
cfgv==3.4.0
|
||||
charset-normalizer==3.4.0
|
||||
click==8.1.7
|
||||
comm==0.2.2
|
||||
debugpy==1.8.7
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
distlib==0.3.9
|
||||
executing==2.1.0
|
||||
fastjsonschema==2.20.0
|
||||
filelock==3.16.1
|
||||
fqdn==1.5.1
|
||||
h11==0.14.0
|
||||
httpcore==1.0.6
|
||||
httpx==0.27.2
|
||||
identify==2.6.1
|
||||
idna==3.10
|
||||
ipykernel==6.29.5
|
||||
ipython==8.28.0
|
||||
ipywidgets==8.1.5
|
||||
isoduration==20.11.0
|
||||
jedi==0.19.1
|
||||
Jinja2==3.1.4
|
||||
json5==0.9.25
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2024.10.1
|
||||
jupyter==1.1.1
|
||||
jupyter-console==6.6.3
|
||||
jupyter-events==0.10.0
|
||||
jupyter-lsp==2.2.5
|
||||
jupyter_client==8.6.3
|
||||
jupyter_core==5.7.2
|
||||
jupyter_server==2.14.2
|
||||
jupyter_server_terminals==0.5.3
|
||||
jupyterlab==4.2.5
|
||||
jupyterlab_pygments==0.3.0
|
||||
jupyterlab_server==2.27.3
|
||||
jupyterlab_widgets==3.0.13
|
||||
markdownify==0.13.1
|
||||
MarkupSafe==3.0.2
|
||||
matplotlib-inline==0.1.7
|
||||
mistune==3.0.2
|
||||
mypy-extensions==1.0.0
|
||||
nbclient==0.10.0
|
||||
nbconvert==7.16.4
|
||||
nbformat==5.10.4
|
||||
nest-asyncio==1.6.0
|
||||
nodeenv==1.9.1
|
||||
notebook==7.2.2
|
||||
notebook_shim==0.2.4
|
||||
numpy==1.26.3
|
||||
overrides==7.7.0
|
||||
packaging==24.1
|
||||
pandas==2.2.3
|
||||
pandocfilters==1.5.1
|
||||
parso==0.8.4
|
||||
pathspec==0.12.1
|
||||
pexpect==4.9.0
|
||||
platformdirs==4.3.6
|
||||
pre_commit==4.0.1
|
||||
prometheus_client==0.21.0
|
||||
prompt_toolkit==3.0.48
|
||||
psutil==6.1.0
|
||||
ptyprocess==0.7.0
|
||||
pure_eval==0.2.3
|
||||
pycparser==2.22
|
||||
pydantic==2.9.2
|
||||
pydantic_core==2.23.4
|
||||
Pygments==2.18.0
|
||||
python-dateutil==2.9.0.post0
|
||||
-e git+https://github.com/fakebranden/JobSpy@60819a8fcabbd3eaba7741b673023612dc3d3692#egg=python_jobspy
|
||||
python-json-logger==2.0.7
|
||||
pytz==2024.2
|
||||
PyYAML==6.0.2
|
||||
pyzmq==26.2.0
|
||||
referencing==0.35.1
|
||||
regex==2024.9.11
|
||||
requests==2.32.3
|
||||
rfc3339-validator==0.1.4
|
||||
rfc3986-validator==0.1.1
|
||||
rpds-py==0.20.0
|
||||
Send2Trash==1.8.3
|
||||
setuptools==75.2.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.6
|
||||
stack-data==0.6.3
|
||||
terminado==0.18.1
|
||||
tinycss2==1.3.0
|
||||
tls-client==1.0.1
|
||||
tornado==6.4.1
|
||||
traitlets==5.14.3
|
||||
types-python-dateutil==2.9.0.20241003
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2024.2
|
||||
uri-template==1.3.0
|
||||
urllib3==2.2.3
|
||||
virtualenv==20.27.0
|
||||
wcwidth==0.2.13
|
||||
webcolors==24.8.0
|
||||
webencodings==0.5.1
|
||||
websocket-client==1.8.0
|
||||
widgetsnbextension==4.0.13
|
||||
Reference in New Issue
Block a user