From 34e85b92615aa1c86e7c1e8bc53abe429a9568f1 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:31:55 +0200 Subject: [PATCH] new handler for goozali --- src/main.py | 36 +++++++---------- src/telegram_handler/__init__.py | 1 + .../telegram_goozali_handler.py | 39 +++++++++++++++++++ 3 files changed, 53 insertions(+), 23 deletions(-) create mode 100644 src/telegram_handler/telegram_goozali_handler.py diff --git a/src/main.py b/src/main.py index 3560c0e..16cdb94 100644 --- a/src/main.py +++ b/src/main.py @@ -1,31 +1,19 @@ import os -import re from telegram import Update from telegram.ext import Application, CommandHandler from src.jobspy import Site, scrape_jobs from src.jobspy.db.job_repository import JobRepository -from src.jobspy.jobs import JobPost from src.jobspy.scrapers.utils import create_logger from src.telegram_bot import TelegramBot -from src.telegram_handler import TelegramAllHandler +from src.telegram_handler import TelegramAllHandler,TelegramGoozaliHandler logger = create_logger("Main") title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", - "Front End", "DevOps", "Physical", "Staff", - "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", - "Data Engineer", "DevSecOps"] - - -def filter_jobs_by_title_name(job: JobPost): - for filter_title in title_filters: - if re.search(filter_title, job.title, re.IGNORECASE): - logger.info(f"job filtered out by title: {job.id} , { - job.title} , found {filter_title}") - return False - - return True + "Front End", "DevOps", "Physical", "Staff", + "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", + "Data Engineer", "DevSecOps"] async def main(): @@ -44,7 +32,6 @@ async def main(): country_indeed='israel' ) logger.info(f"Found {len(jobs)} jobs") - jobs = list(filter(filter_jobs_by_title_name, jobs)) newJobs = jobRepository.insertManyIfNotFound(jobs) for newJob in newJobs: await telegramBot.sendJob(newJob) @@ -52,19 +39,22 @@ async def main(): # Run the async main function if __name__ == "__main__": - # asyncio.run(main()) logger.info("Starting initialize ") _api_token = os.getenv("TELEGRAM_API_TOKEN") + search_term = "software engineer" + locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"] application = Application.builder().token(_api_token).build() - tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI], - locations=["Tel Aviv, Israel", "Ramat Gan, Israel", - "Central, Israel", "Rehovot ,Israel"], + tg_handler_all = TelegramAllHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI], + locations=locations, title_filters=title_filters, - search_term="software engineer") + search_term=search_term) application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) + tg_handler_goozali = TelegramGoozaliHandler(locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler("goozali", tg_handler_goozali.handle)) # application.add_handler(CommandHandler("galssdoor", find_glassdoor)) # application.add_handler(CommandHandler("linkedin", find_linkedin)) # application.add_handler(CommandHandler("indeed", find_indeed)) - # application.add_handler(CommandHandler("goozali", find_goozali)) logger.info("Run polling from telegram") application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/telegram_handler/__init__.py b/src/telegram_handler/__init__.py index 3be473e..27859fe 100644 --- a/src/telegram_handler/__init__.py +++ b/src/telegram_handler/__init__.py @@ -1 +1,2 @@ from .telegram_all_handler import TelegramAllHandler +from .telegram_goozali_handler import TelegramGoozaliHandler diff --git a/src/telegram_handler/telegram_goozali_handler.py b/src/telegram_handler/telegram_goozali_handler.py new file mode 100644 index 0000000..775d8b9 --- /dev/null +++ b/src/telegram_handler/telegram_goozali_handler.py @@ -0,0 +1,39 @@ +from telegram import Update +from telegram.ext import ( + ContextTypes, +) + +from src.jobspy import Site, scrape_jobs +from src.jobspy.db.job_repository import JobRepository +from src.jobspy.scrapers.utils import create_logger +from src.telegram_bot import TelegramBot +from src.telegram_handler.telegram_handler import TelegramHandler + +logger = create_logger("TelegramAllHandler") + + +class TelegramGoozaliHandler(TelegramHandler): + def __init__(self, locations: list[str], title_filters: list[str], search_term: str): + self.sites_to_scrap = [Site.GOOZALI] + self.locations = locations + self.search_term = search_term + self.title_filters = title_filters + self.telegramBot = TelegramBot() + self.jobRepository = JobRepository() + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + logger.info("start handling") + jobs = scrape_jobs( + site_name=self.sites_to_scrap, + search_term=self.search_term, + locations=self.locations, + results_wanted=200, + hours_old=48, + country_indeed='israel', + filter_by_title=self.title_filters + ) + logger.info(f"Found {len(jobs)} jobs") + new_jobs = self.jobRepository.insertManyIfNotFound(jobs) + for newJob in new_jobs: + await self.telegramBot.sendJob(newJob) + logger.info("finished handling")