From c105c31bab398bd76e66bc5d191ee6466440b7c7 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 19 Dec 2024 13:25:55 +0200 Subject: [PATCH] filter works --- src/jobspy/main.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/jobspy/main.py b/src/jobspy/main.py index ffae2a4..83655eb 100644 --- a/src/jobspy/main.py +++ b/src/jobspy/main.py @@ -3,15 +3,18 @@ import re from jobspy import Site, scrape_jobs from jobspy.db.job_repository import JobRepository from jobspy.jobs import JobPost +from jobspy.scrapers.utils import create_logger from jobspy.telegram_bot import TelegramBot +logger = create_logger("Main") filter_by_title: list[str] = ["test", "qa", "Lead", "Full Stack", "Fullstack", "Frontend" "data", "automation", "BI", "Principal"] -def filter_jobs_by_title_name(job: JobPost): +def filter_jobs_by_title_name(job): for filter_title in filter_by_title: if re.search(filter_title, job.title, re.IGNORECASE): + logger.info(f"job filtered out by title: {job.id} , {job.title}") return False return True @@ -20,25 +23,25 @@ def filter_jobs_by_title_name(job: JobPost): async def main(): telegramBot = TelegramBot() jobRepository = JobRepository() + # sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI] + sites_to_scrap = [Site.GLASSDOOR] + for site in sites_to_scrap: + jobs = scrape_jobs( + site_name=[site], + search_term="software engineer", + google_search_term="software engineer jobs near Tel Aviv Israel since yesterday", + locations=["Ramat Gan, Israel"], + results_wanted=200, + hours_old=200, + country_indeed='israel', + ) + logger.info(f"Found {len(jobs)} jobs") + jobs = list(filter(filter_jobs_by_title_name, jobs)) - jobs = scrape_jobs( - site_name=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED], - # site_name=[Site.GOOZALI], - search_term="software engineer", - google_search_term="software engineer jobs near Tel Aviv Israel since yesterday", - locations=["Tel Aviv, Israel", "Ramat Gan, Israel", - "Central, Israel", "Rehovot ,Israel"], - results_wanted=200, - hours_old=200, - country_indeed='israel', - ) - print(f"Found {len(jobs)} jobs") - job = filter(filter_jobs_by_title_name, jobs) + newJobs = jobRepository.insertManyIfNotFound(jobs) - newJobs = jobRepository.insertManyIfNotFound(jobs) - - for newJob in newJobs: - await telegramBot.sendJob(newJob) + for newJob in newJobs: + await telegramBot.sendJob(newJob) # Run the async main function if __name__ == "__main__":