mirror of https://github.com/Bunsly/JobSpy
109 lines
3.9 KiB
Python
109 lines
3.9 KiB
Python
from jobspy import scrape_jobs
|
|
import time
|
|
import certifi
|
|
import pandas as pd
|
|
import csv
|
|
from datetime import datetime
|
|
from typing import Optional, List
|
|
|
|
def filter_clearance_jobs(df: pd.DataFrame) -> pd.DataFrame:
|
|
"""Filter out jobs requiring security clearance"""
|
|
clearance_keywords = [
|
|
'clearance', 'security clearance', 'secret', 'top secret',
|
|
'ts/sci', 'sci', 'classified', 'poly', 'polygraph',
|
|
'public trust', 'security+', 'security plus'
|
|
]
|
|
|
|
# Create a pattern matching any clearance keyword
|
|
pattern = '|'.join(clearance_keywords)
|
|
|
|
# Filter out jobs where title or description contains clearance keywords
|
|
mask = ~(
|
|
df['title'].str.lower().str.contains(pattern, na=False) |
|
|
df['description'].str.lower().str.contains(pattern, na=False)
|
|
)
|
|
|
|
return df[mask]
|
|
|
|
def search_tech_jobs(
|
|
search_sites: List[str] = ["indeed", "linkedin"],
|
|
exclude_clearance: bool = False
|
|
) -> Optional[pd.DataFrame]:
|
|
|
|
# Search configuration
|
|
search_config = {
|
|
'search_term': 'IT Engineer',
|
|
'location': 'Lone Tree, CO',
|
|
'distance': 25,
|
|
'results_wanted': 50,
|
|
'job_type': 'fulltime',
|
|
'hours_old': 72
|
|
}
|
|
|
|
try:
|
|
print(f"Searching for: {search_config['search_term']} in {search_config['location']}")
|
|
print(f"Distance: {search_config['distance']} miles")
|
|
print(f"Job Type: {search_config['job_type']}")
|
|
print(f"Posts from last: {search_config['hours_old']} hours")
|
|
print(f"Excluding clearance jobs: {exclude_clearance}")
|
|
print(f"Searching on: {', '.join(search_sites)}")
|
|
|
|
jobs = scrape_jobs(
|
|
site_name=search_sites,
|
|
search_term=search_config['search_term'],
|
|
location=search_config['location'],
|
|
distance=search_config['distance'],
|
|
results_wanted=search_config['results_wanted'],
|
|
job_type=search_config['job_type'],
|
|
hours_old=search_config['hours_old'],
|
|
country_indeed="USA",
|
|
description_format="markdown",
|
|
verbose=2
|
|
)
|
|
|
|
if isinstance(jobs, pd.DataFrame) and not jobs.empty:
|
|
print(f"\nInitial jobs found: {len(jobs)}")
|
|
|
|
if exclude_clearance:
|
|
original_count = len(jobs)
|
|
jobs = filter_clearance_jobs(jobs)
|
|
filtered_count = len(jobs)
|
|
print(f"Removed {original_count - filtered_count} jobs requiring clearance")
|
|
|
|
# Save results
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
csv_filename = f"it_jobs_{timestamp}.csv"
|
|
|
|
# Print job summary
|
|
print("\nJob Listings Found:")
|
|
print("-------------------")
|
|
for idx, job in jobs.iterrows():
|
|
print(f"\n{idx + 1}. {job.get('title', 'No title')}")
|
|
print(f" Company: {job.get('company', 'No company')}")
|
|
print(f" Location: {job.get('location', 'No location')}")
|
|
print(f" Source: {job.get('site', 'No source')}")
|
|
print(f" Date Posted: {job.get('date_posted', 'No date')}")
|
|
|
|
jobs.to_csv(csv_filename, index=False)
|
|
print(f"\nResults saved to: {csv_filename}")
|
|
return jobs
|
|
|
|
print("No jobs found with current search parameters.")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"\nError during search:")
|
|
print(f"Error details: {str(e)}")
|
|
return None
|
|
|
|
if __name__ == "__main__":
|
|
print("Starting job search...")
|
|
jobs = search_tech_jobs(exclude_clearance=True)
|
|
|
|
if jobs is not None and not jobs.empty:
|
|
print("\nSearch completed successfully!")
|
|
print(f"Total jobs found: {len(jobs)}")
|
|
print("\nJobs by source:")
|
|
print(jobs['site'].value_counts())
|
|
else:
|
|
print("\nNo results found. Try adjusting search parameters.") |