From f010ce51b1a05554778a358945a4e6aa34bf0fe1 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 16:44:35 +0200 Subject: [PATCH 1/7] init commit --- src/jobspy/main.py | 16 +++++++++++- src/jobspy/telegram_bot.py | 20 ++++++++++---- src/jobspy/telegram_handler.py | 48 ++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 src/jobspy/telegram_handler.py diff --git a/src/jobspy/main.py b/src/jobspy/main.py index 88a097c..a01d3a1 100644 --- a/src/jobspy/main.py +++ b/src/jobspy/main.py @@ -1,10 +1,15 @@ import asyncio +import os import re + +from telegram.ext import Application, CommandHandler + from src.jobspy import Site, scrape_jobs from src.jobspy.db.job_repository import JobRepository from src.jobspy.jobs import JobPost from src.jobspy.scrapers.utils import create_logger from src.jobspy.telegram_bot import TelegramBot +from src.jobspy.telegram_handler import TelegramHandler logger = create_logger("Main") filter_by_title: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", "Front End", "DevOps", "Physical", "Staff", @@ -24,6 +29,7 @@ def filter_jobs_by_title_name(job: JobPost): async def main(): telegramBot = TelegramBot() jobRepository = JobRepository() + tg_handler = TelegramHandler() # sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI] sites_to_scrap = [Site.GOOZALI] # sites_to_scrap = [Site.GOOZALI] @@ -44,4 +50,12 @@ async def main(): # Run the async main function if __name__ == "__main__": - asyncio.run(main()) + # asyncio.run(main()) + _api_token = os.getenv("TELEGRAM_API_TOKEN") + application = Application.builder().token(_api_token).build() + application.add_handler(CommandHandler("find", findAll)) + application.add_handler(CommandHandler("galssdoor", find_glassdoor)) + application.add_handler(CommandHandler("linkedin", find_linkedin)) + application.add_handler(CommandHandler("indeed", find_indeed)) + application.add_handler(CommandHandler("goozali", find_goozali)) + tg_handler = TelegramHandler().handler() diff --git a/src/jobspy/telegram_bot.py b/src/jobspy/telegram_bot.py index d71f511..0ab1b8a 100644 --- a/src/jobspy/telegram_bot.py +++ b/src/jobspy/telegram_bot.py @@ -1,7 +1,14 @@ import os from dotenv import load_dotenv -from telegram import Bot - +from telegram import Bot, Update +from telegram.ext import ( + Application, + CommandHandler, + ContextTypes, + ConversationHandler, + MessageHandler, + filters, +) from .jobs import JobPost from .scrapers.utils import create_logger @@ -16,13 +23,16 @@ class TelegramBot: self._api_token = os.getenv("TELEGRAM_API_TOKEN") self.chatId = os.getenv("TELEGRAM_CHAT_ID") self.bot = Bot(token=self._api_token) + # Create the Application and pass it your bot's token. + self.application = Application.builder().token(self._api_token).build() + # Run the bot until the user presses Ctrl-C + self.application.run_polling(allowed_updates=Update.ALL_TYPES) async def sendJob(self, job: JobPost): """ Send JobPost details to Telegram chat. """ - message = f"New Job Posted:\n\n" \ - f"Job ID: {job.id}\n" \ + message = f"Job ID: {job.id}\n" \ f"Job Title: {job.title}\n" \ f"Company: {job.company_name}\n" \ f"Location: {job.location.display_location()}\n" \ @@ -32,4 +42,4 @@ class TelegramBot: logger.info(f"Sent job to Telegram: {job.id}") except Exception as e: logger.error(f"Failed to send job to Telegram: {job.id}") - logger.error(f"Error: {e}") + logger.error(f"Error: {e}") \ No newline at end of file diff --git a/src/jobspy/telegram_handler.py b/src/jobspy/telegram_handler.py new file mode 100644 index 0000000..348446a --- /dev/null +++ b/src/jobspy/telegram_handler.py @@ -0,0 +1,48 @@ +import os +from dotenv import load_dotenv +from telegram import Bot, Update, ReplyKeyboardMarkup +from telegram.ext import ( + Application, + CommandHandler, + ContextTypes, + ConversationHandler, + MessageHandler, + filters, +) +from .scrapers.utils import create_logger + +load_dotenv() + +logger = create_logger("TelegramBot") + + +class TelegramHandler: + def __init__(self): + self._api_token = os.getenv("TELEGRAM_API_TOKEN") + self.chatId = os.getenv("TELEGRAM_CHAT_ID") + self.bot = Bot(token=self._api_token) + # Create the Application and pass it your bot's token. + self.application = Application.builder().token(self._api_token).build() + + async def findAll(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + """Start the conversation and ask user for input.""" + await update.message.reply_text( + "Hi! My name is Doctor Botter. I will hold a more complex conversation with you. " + "Why don't you tell me something about yourself?" + ) + + async def find_glassdoor(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + """Start the conversation and ask user for input.""" + await update.message.reply_text( + "Hi! My name is Doctor Botter. I will hold a more complex conversation with you. " + "Why don't you tell me something about yourself?" + ) + + def handler(self): + self.application.add_handler(CommandHandler("find", self.findAll)) + self.application.add_handler(CommandHandler("galssdoor", self.find_glassdoor)) + self.application.add_handler(CommandHandler("linkedin", self.findAll)) + self.application.add_handler(CommandHandler("indeed", self.findAll)) + self.application.add_handler(CommandHandler("goozali", self.findAll)) + # Run the bot until the user presses Ctrl-C + self.application.run_polling(allowed_updates=Update.ALL_TYPES) \ No newline at end of file From 257f24ef1918618aac4d28919bb47260089bd237 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:14:50 +0200 Subject: [PATCH 2/7] findall command works --- src/jobspy/__init__.py | 16 +++++++++-- src/jobspy/telegram_handler.py | 48 -------------------------------- src/{jobspy => }/main.py | 36 ++++++++++++++---------- src/{jobspy => }/telegram_bot.py | 8 ++---- src/telegram_handler.py | 37 ++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 70 deletions(-) delete mode 100644 src/jobspy/telegram_handler.py rename src/{jobspy => }/main.py (53%) rename src/{jobspy => }/telegram_bot.py (74%) create mode 100644 src/telegram_handler.py diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index ddf9068..22ebf31 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -1,4 +1,6 @@ from __future__ import annotations + +import re from threading import Lock import pandas as pd @@ -48,8 +50,9 @@ def scrape_jobs( hours_old: int = None, enforce_annual_salary: bool = False, verbose: int = 2, + filter_by_title:list[str] = None, ** kwargs, -) -> pd.DataFrame: +) -> list[JobPost]: """ Simultaneously scrapes job data from multiple job sites. :return: pandas dataframe containing job data @@ -148,4 +151,13 @@ def scrape_jobs( except Exception as e: logger.error(f"Future Error occurred: {e}") - return merged_jobs + def filter_jobs_by_title_name(job: JobPost): + for filter_title in filter_by_title: + if re.search(filter_title, job.title, re.IGNORECASE): + logger.info(f"job filtered out by title: {job.id} , { + job.title} , found {filter_title}") + return False + + return True + + return list(filter(filter_jobs_by_title_name, merged_jobs)) diff --git a/src/jobspy/telegram_handler.py b/src/jobspy/telegram_handler.py deleted file mode 100644 index 348446a..0000000 --- a/src/jobspy/telegram_handler.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from dotenv import load_dotenv -from telegram import Bot, Update, ReplyKeyboardMarkup -from telegram.ext import ( - Application, - CommandHandler, - ContextTypes, - ConversationHandler, - MessageHandler, - filters, -) -from .scrapers.utils import create_logger - -load_dotenv() - -logger = create_logger("TelegramBot") - - -class TelegramHandler: - def __init__(self): - self._api_token = os.getenv("TELEGRAM_API_TOKEN") - self.chatId = os.getenv("TELEGRAM_CHAT_ID") - self.bot = Bot(token=self._api_token) - # Create the Application and pass it your bot's token. - self.application = Application.builder().token(self._api_token).build() - - async def findAll(self, update: Update, context: ContextTypes.DEFAULT_TYPE): - """Start the conversation and ask user for input.""" - await update.message.reply_text( - "Hi! My name is Doctor Botter. I will hold a more complex conversation with you. " - "Why don't you tell me something about yourself?" - ) - - async def find_glassdoor(self, update: Update, context: ContextTypes.DEFAULT_TYPE): - """Start the conversation and ask user for input.""" - await update.message.reply_text( - "Hi! My name is Doctor Botter. I will hold a more complex conversation with you. " - "Why don't you tell me something about yourself?" - ) - - def handler(self): - self.application.add_handler(CommandHandler("find", self.findAll)) - self.application.add_handler(CommandHandler("galssdoor", self.find_glassdoor)) - self.application.add_handler(CommandHandler("linkedin", self.findAll)) - self.application.add_handler(CommandHandler("indeed", self.findAll)) - self.application.add_handler(CommandHandler("goozali", self.findAll)) - # Run the bot until the user presses Ctrl-C - self.application.run_polling(allowed_updates=Update.ALL_TYPES) \ No newline at end of file diff --git a/src/jobspy/main.py b/src/main.py similarity index 53% rename from src/jobspy/main.py rename to src/main.py index a01d3a1..cd28e90 100644 --- a/src/jobspy/main.py +++ b/src/main.py @@ -1,26 +1,28 @@ -import asyncio import os import re +from telegram import Update from telegram.ext import Application, CommandHandler from src.jobspy import Site, scrape_jobs from src.jobspy.db.job_repository import JobRepository from src.jobspy.jobs import JobPost from src.jobspy.scrapers.utils import create_logger -from src.jobspy.telegram_bot import TelegramBot -from src.jobspy.telegram_handler import TelegramHandler +from src.telegram_bot import TelegramBot +from src.telegram_handler import TelegramAllHandler logger = create_logger("Main") -filter_by_title: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", "Front End", "DevOps", "Physical", "Staff", - "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"] +title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", + "Front End", "DevOps", "Physical", "Staff", + "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", + "Data Engineer", "DevSecOps"] def filter_jobs_by_title_name(job: JobPost): - for filter_title in filter_by_title: + for filter_title in title_filters: if re.search(filter_title, job.title, re.IGNORECASE): logger.info(f"job filtered out by title: {job.id} , { - job.title} , found {filter_title}") + job.title} , found {filter_title}") return False return True @@ -29,7 +31,6 @@ def filter_jobs_by_title_name(job: JobPost): async def main(): telegramBot = TelegramBot() jobRepository = JobRepository() - tg_handler = TelegramHandler() # sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI] sites_to_scrap = [Site.GOOZALI] # sites_to_scrap = [Site.GOOZALI] @@ -48,14 +49,21 @@ async def main(): for newJob in newJobs: await telegramBot.sendJob(newJob) + # Run the async main function if __name__ == "__main__": # asyncio.run(main()) + logger.info("Starting initialize ") _api_token = os.getenv("TELEGRAM_API_TOKEN") + tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI], + locations=["Tel Aviv, Israel", "Ramat Gan, Israel", + "Central, Israel", "Rehovot ,Israel"], + title_filters=title_filters) application = Application.builder().token(_api_token).build() - application.add_handler(CommandHandler("find", findAll)) - application.add_handler(CommandHandler("galssdoor", find_glassdoor)) - application.add_handler(CommandHandler("linkedin", find_linkedin)) - application.add_handler(CommandHandler("indeed", find_indeed)) - application.add_handler(CommandHandler("goozali", find_goozali)) - tg_handler = TelegramHandler().handler() + application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) + # application.add_handler(CommandHandler("galssdoor", find_glassdoor)) + # application.add_handler(CommandHandler("linkedin", find_linkedin)) + # application.add_handler(CommandHandler("indeed", find_indeed)) + # application.add_handler(CommandHandler("goozali", find_goozali)) + logger.info("Run polling from telegram") + application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/jobspy/telegram_bot.py b/src/telegram_bot.py similarity index 74% rename from src/jobspy/telegram_bot.py rename to src/telegram_bot.py index 0ab1b8a..d34084b 100644 --- a/src/jobspy/telegram_bot.py +++ b/src/telegram_bot.py @@ -9,8 +9,8 @@ from telegram.ext import ( MessageHandler, filters, ) -from .jobs import JobPost -from .scrapers.utils import create_logger +from src.jobspy.jobs import JobPost +from src.jobspy.scrapers.utils import create_logger load_dotenv() @@ -23,10 +23,6 @@ class TelegramBot: self._api_token = os.getenv("TELEGRAM_API_TOKEN") self.chatId = os.getenv("TELEGRAM_CHAT_ID") self.bot = Bot(token=self._api_token) - # Create the Application and pass it your bot's token. - self.application = Application.builder().token(self._api_token).build() - # Run the bot until the user presses Ctrl-C - self.application.run_polling(allowed_updates=Update.ALL_TYPES) async def sendJob(self, job: JobPost): """ diff --git a/src/telegram_handler.py b/src/telegram_handler.py new file mode 100644 index 0000000..2aeb970 --- /dev/null +++ b/src/telegram_handler.py @@ -0,0 +1,37 @@ +from telegram import Update +from telegram.ext import ( + ContextTypes, +) + +from src.jobspy import Site, scrape_jobs +from src.jobspy.db.job_repository import JobRepository +from src.jobspy.scrapers.utils import create_logger +from src.telegram_bot import TelegramBot + +logger = create_logger("TelegramAllHandler") + + +class TelegramAllHandler: + def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str]): + self.sites_to_scrap = sites + self.locations = locations + self.title_filters = title_filters + self.telegramBot = TelegramBot() + self.jobRepository = JobRepository() + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + logger.info("start handling") + jobs = scrape_jobs( + site_name=self.sites_to_scrap, + search_term="software engineer", + locations=self.locations, + results_wanted=200, + hours_old=48, + country_indeed='israel', + filter_by_title=self.title_filters + ) + logger.info(f"Found {len(jobs)} jobs") + new_jobs = self.jobRepository.insertManyIfNotFound(jobs) + for newJob in new_jobs: + await self.telegramBot.sendJob(newJob) + logger.info("finished handling") From 458728bb715be6b0697406940b8b9a119d76e21e Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:19:56 +0200 Subject: [PATCH 3/7] moved to new folder --- src/__init__.py | 0 src/telegram_handler/__init__.py | 1 + .../telegram_all_handler.py} | 0 3 files changed, 1 insertion(+) create mode 100644 src/__init__.py create mode 100644 src/telegram_handler/__init__.py rename src/{telegram_handler.py => telegram_handler/telegram_all_handler.py} (100%) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/telegram_handler/__init__.py b/src/telegram_handler/__init__.py new file mode 100644 index 0000000..3be473e --- /dev/null +++ b/src/telegram_handler/__init__.py @@ -0,0 +1 @@ +from .telegram_all_handler import TelegramAllHandler diff --git a/src/telegram_handler.py b/src/telegram_handler/telegram_all_handler.py similarity index 100% rename from src/telegram_handler.py rename to src/telegram_handler/telegram_all_handler.py From 4549b2ffc83194ddd67cde8b6594ad6628eb65e5 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:23:12 +0200 Subject: [PATCH 4/7] new abstract class to identify telegram handler --- src/main.py | 2 +- src/telegram_handler/telegram_all_handler.py | 3 ++- src/telegram_handler/telegram_handler.py | 12 ++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 src/telegram_handler/telegram_handler.py diff --git a/src/main.py b/src/main.py index cd28e90..3448f51 100644 --- a/src/main.py +++ b/src/main.py @@ -55,11 +55,11 @@ if __name__ == "__main__": # asyncio.run(main()) logger.info("Starting initialize ") _api_token = os.getenv("TELEGRAM_API_TOKEN") + application = Application.builder().token(_api_token).build() tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI], locations=["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"], title_filters=title_filters) - application = Application.builder().token(_api_token).build() application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) # application.add_handler(CommandHandler("galssdoor", find_glassdoor)) # application.add_handler(CommandHandler("linkedin", find_linkedin)) diff --git a/src/telegram_handler/telegram_all_handler.py b/src/telegram_handler/telegram_all_handler.py index 2aeb970..2eaecec 100644 --- a/src/telegram_handler/telegram_all_handler.py +++ b/src/telegram_handler/telegram_all_handler.py @@ -7,11 +7,12 @@ from src.jobspy import Site, scrape_jobs from src.jobspy.db.job_repository import JobRepository from src.jobspy.scrapers.utils import create_logger from src.telegram_bot import TelegramBot +from src.telegram_handler.telegram_handler import TelegramHandler logger = create_logger("TelegramAllHandler") -class TelegramAllHandler: +class TelegramAllHandler(TelegramHandler): def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str]): self.sites_to_scrap = sites self.locations = locations diff --git a/src/telegram_handler/telegram_handler.py b/src/telegram_handler/telegram_handler.py new file mode 100644 index 0000000..14c71a3 --- /dev/null +++ b/src/telegram_handler/telegram_handler.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod + +from telegram import Update +from telegram.ext import ContextTypes + + +# Define an abstract class +class TelegramHandler(ABC): + + @abstractmethod + def handle(self, update: Update, context: ContextTypes): + pass # This is an abstract method, no implementation here. \ No newline at end of file From f1b127ae449a93e077617946d579a65e41ad9d85 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:25:47 +0200 Subject: [PATCH 5/7] added search_term to init --- src/main.py | 3 ++- src/telegram_handler/telegram_all_handler.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 3448f51..3560c0e 100644 --- a/src/main.py +++ b/src/main.py @@ -59,7 +59,8 @@ if __name__ == "__main__": tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI], locations=["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"], - title_filters=title_filters) + title_filters=title_filters, + search_term="software engineer") application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) # application.add_handler(CommandHandler("galssdoor", find_glassdoor)) # application.add_handler(CommandHandler("linkedin", find_linkedin)) diff --git a/src/telegram_handler/telegram_all_handler.py b/src/telegram_handler/telegram_all_handler.py index 2eaecec..7b3a824 100644 --- a/src/telegram_handler/telegram_all_handler.py +++ b/src/telegram_handler/telegram_all_handler.py @@ -13,9 +13,10 @@ logger = create_logger("TelegramAllHandler") class TelegramAllHandler(TelegramHandler): - def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str]): + def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str],search_term:str): self.sites_to_scrap = sites self.locations = locations + self.search_term = search_term self.title_filters = title_filters self.telegramBot = TelegramBot() self.jobRepository = JobRepository() @@ -24,7 +25,7 @@ class TelegramAllHandler(TelegramHandler): logger.info("start handling") jobs = scrape_jobs( site_name=self.sites_to_scrap, - search_term="software engineer", + search_term=self.search_term, locations=self.locations, results_wanted=200, hours_old=48, From 34e85b92615aa1c86e7c1e8bc53abe429a9568f1 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:31:55 +0200 Subject: [PATCH 6/7] new handler for goozali --- src/main.py | 36 +++++++---------- src/telegram_handler/__init__.py | 1 + .../telegram_goozali_handler.py | 39 +++++++++++++++++++ 3 files changed, 53 insertions(+), 23 deletions(-) create mode 100644 src/telegram_handler/telegram_goozali_handler.py diff --git a/src/main.py b/src/main.py index 3560c0e..16cdb94 100644 --- a/src/main.py +++ b/src/main.py @@ -1,31 +1,19 @@ import os -import re from telegram import Update from telegram.ext import Application, CommandHandler from src.jobspy import Site, scrape_jobs from src.jobspy.db.job_repository import JobRepository -from src.jobspy.jobs import JobPost from src.jobspy.scrapers.utils import create_logger from src.telegram_bot import TelegramBot -from src.telegram_handler import TelegramAllHandler +from src.telegram_handler import TelegramAllHandler,TelegramGoozaliHandler logger = create_logger("Main") title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", - "Front End", "DevOps", "Physical", "Staff", - "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", - "Data Engineer", "DevSecOps"] - - -def filter_jobs_by_title_name(job: JobPost): - for filter_title in title_filters: - if re.search(filter_title, job.title, re.IGNORECASE): - logger.info(f"job filtered out by title: {job.id} , { - job.title} , found {filter_title}") - return False - - return True + "Front End", "DevOps", "Physical", "Staff", + "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", + "Data Engineer", "DevSecOps"] async def main(): @@ -44,7 +32,6 @@ async def main(): country_indeed='israel' ) logger.info(f"Found {len(jobs)} jobs") - jobs = list(filter(filter_jobs_by_title_name, jobs)) newJobs = jobRepository.insertManyIfNotFound(jobs) for newJob in newJobs: await telegramBot.sendJob(newJob) @@ -52,19 +39,22 @@ async def main(): # Run the async main function if __name__ == "__main__": - # asyncio.run(main()) logger.info("Starting initialize ") _api_token = os.getenv("TELEGRAM_API_TOKEN") + search_term = "software engineer" + locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"] application = Application.builder().token(_api_token).build() - tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI], - locations=["Tel Aviv, Israel", "Ramat Gan, Israel", - "Central, Israel", "Rehovot ,Israel"], + tg_handler_all = TelegramAllHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI], + locations=locations, title_filters=title_filters, - search_term="software engineer") + search_term=search_term) application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) + tg_handler_goozali = TelegramGoozaliHandler(locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler("goozali", tg_handler_goozali.handle)) # application.add_handler(CommandHandler("galssdoor", find_glassdoor)) # application.add_handler(CommandHandler("linkedin", find_linkedin)) # application.add_handler(CommandHandler("indeed", find_indeed)) - # application.add_handler(CommandHandler("goozali", find_goozali)) logger.info("Run polling from telegram") application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/telegram_handler/__init__.py b/src/telegram_handler/__init__.py index 3be473e..27859fe 100644 --- a/src/telegram_handler/__init__.py +++ b/src/telegram_handler/__init__.py @@ -1 +1,2 @@ from .telegram_all_handler import TelegramAllHandler +from .telegram_goozali_handler import TelegramGoozaliHandler diff --git a/src/telegram_handler/telegram_goozali_handler.py b/src/telegram_handler/telegram_goozali_handler.py new file mode 100644 index 0000000..775d8b9 --- /dev/null +++ b/src/telegram_handler/telegram_goozali_handler.py @@ -0,0 +1,39 @@ +from telegram import Update +from telegram.ext import ( + ContextTypes, +) + +from src.jobspy import Site, scrape_jobs +from src.jobspy.db.job_repository import JobRepository +from src.jobspy.scrapers.utils import create_logger +from src.telegram_bot import TelegramBot +from src.telegram_handler.telegram_handler import TelegramHandler + +logger = create_logger("TelegramAllHandler") + + +class TelegramGoozaliHandler(TelegramHandler): + def __init__(self, locations: list[str], title_filters: list[str], search_term: str): + self.sites_to_scrap = [Site.GOOZALI] + self.locations = locations + self.search_term = search_term + self.title_filters = title_filters + self.telegramBot = TelegramBot() + self.jobRepository = JobRepository() + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + logger.info("start handling") + jobs = scrape_jobs( + site_name=self.sites_to_scrap, + search_term=self.search_term, + locations=self.locations, + results_wanted=200, + hours_old=48, + country_indeed='israel', + filter_by_title=self.title_filters + ) + logger.info(f"Found {len(jobs)} jobs") + new_jobs = self.jobRepository.insertManyIfNotFound(jobs) + for newJob in new_jobs: + await self.telegramBot.sendJob(newJob) + logger.info("finished handling") From 3fcc87e9b7152b9c83b2b52e3a42b637b7458047 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:53:31 +0200 Subject: [PATCH 7/7] created defualt handler and another for indeed. on bot i can do findAll and find one by one with the command of the site name --- src/main.py | 67 ++++++++----------- src/telegram_handler/__init__.py | 4 +- ...handler.py => telegram_default_handler.py} | 17 ++--- ..._handler.py => telegram_indeed_handler.py} | 13 ++-- 4 files changed, 46 insertions(+), 55 deletions(-) rename src/telegram_handler/{telegram_all_handler.py => telegram_default_handler.py} (72%) rename src/telegram_handler/{telegram_goozali_handler.py => telegram_indeed_handler.py} (78%) diff --git a/src/main.py b/src/main.py index 16cdb94..48d88a8 100644 --- a/src/main.py +++ b/src/main.py @@ -3,11 +3,9 @@ import os from telegram import Update from telegram.ext import Application, CommandHandler -from src.jobspy import Site, scrape_jobs -from src.jobspy.db.job_repository import JobRepository +from src.jobspy import Site from src.jobspy.scrapers.utils import create_logger -from src.telegram_bot import TelegramBot -from src.telegram_handler import TelegramAllHandler,TelegramGoozaliHandler +from src.telegram_handler import TelegramIndeedHandler, TelegramDefaultHandler logger = create_logger("Main") title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", @@ -15,46 +13,39 @@ title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "F "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"] - -async def main(): - telegramBot = TelegramBot() - jobRepository = JobRepository() - # sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI] - sites_to_scrap = [Site.GOOZALI] - # sites_to_scrap = [Site.GOOZALI] - jobs = scrape_jobs( - site_name=sites_to_scrap, - search_term="software engineer", - locations=["Tel Aviv, Israel", "Ramat Gan, Israel", - "Central, Israel", "Rehovot ,Israel"], - results_wanted=200, - hours_old=48, - country_indeed='israel' - ) - logger.info(f"Found {len(jobs)} jobs") - newJobs = jobRepository.insertManyIfNotFound(jobs) - for newJob in newJobs: - await telegramBot.sendJob(newJob) - - -# Run the async main function if __name__ == "__main__": logger.info("Starting initialize ") _api_token = os.getenv("TELEGRAM_API_TOKEN") search_term = "software engineer" locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"] application = Application.builder().token(_api_token).build() - tg_handler_all = TelegramAllHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI], - locations=locations, - title_filters=title_filters, - search_term=search_term) + tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI], + locations=locations, + title_filters=title_filters, + search_term=search_term) application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) - tg_handler_goozali = TelegramGoozaliHandler(locations=locations, - title_filters=title_filters, - search_term=search_term) - application.add_handler(CommandHandler("goozali", tg_handler_goozali.handle)) - # application.add_handler(CommandHandler("galssdoor", find_glassdoor)) - # application.add_handler(CommandHandler("linkedin", find_linkedin)) - # application.add_handler(CommandHandler("indeed", find_indeed)) + # Goozali + tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI], + locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.GOOZALI.value, tg_handler_goozali.handle)) + # GlassDoor + tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR], + locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.GLASSDOOR.value, tg_handler_glassdoor.handle)) + # LinkeDin + tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN], + locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.LINKEDIN.value, tg_handler_linkedin.handle)) + # Indeed + tg_handler_indeed = TelegramIndeedHandler(locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.INDEED.value, tg_handler_indeed.handle)) logger.info("Run polling from telegram") application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/telegram_handler/__init__.py b/src/telegram_handler/__init__.py index 27859fe..0316d08 100644 --- a/src/telegram_handler/__init__.py +++ b/src/telegram_handler/__init__.py @@ -1,2 +1,2 @@ -from .telegram_all_handler import TelegramAllHandler -from .telegram_goozali_handler import TelegramGoozaliHandler +from .telegram_default_handler import TelegramDefaultHandler +from .telegram_indeed_handler import TelegramIndeedHandler diff --git a/src/telegram_handler/telegram_all_handler.py b/src/telegram_handler/telegram_default_handler.py similarity index 72% rename from src/telegram_handler/telegram_all_handler.py rename to src/telegram_handler/telegram_default_handler.py index 7b3a824..49b71dd 100644 --- a/src/telegram_handler/telegram_all_handler.py +++ b/src/telegram_handler/telegram_default_handler.py @@ -9,31 +9,32 @@ from src.jobspy.scrapers.utils import create_logger from src.telegram_bot import TelegramBot from src.telegram_handler.telegram_handler import TelegramHandler -logger = create_logger("TelegramAllHandler") - -class TelegramAllHandler(TelegramHandler): - def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str],search_term:str): +class TelegramDefaultHandler(TelegramHandler): + def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str], search_term: str): self.sites_to_scrap = sites self.locations = locations self.search_term = search_term self.title_filters = title_filters self.telegramBot = TelegramBot() self.jobRepository = JobRepository() + if len(sites) == 1: + self.logger = create_logger(f"Telegram{sites[0].name.title()}Handler") + else: + self.logger = create_logger("TelegramAllHandler") async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): - logger.info("start handling") + self.logger.info("start handling") jobs = scrape_jobs( site_name=self.sites_to_scrap, search_term=self.search_term, locations=self.locations, results_wanted=200, hours_old=48, - country_indeed='israel', filter_by_title=self.title_filters ) - logger.info(f"Found {len(jobs)} jobs") + self.logger.info(f"Found {len(jobs)} jobs") new_jobs = self.jobRepository.insertManyIfNotFound(jobs) for newJob in new_jobs: await self.telegramBot.sendJob(newJob) - logger.info("finished handling") + self.logger.info("finished handling") diff --git a/src/telegram_handler/telegram_goozali_handler.py b/src/telegram_handler/telegram_indeed_handler.py similarity index 78% rename from src/telegram_handler/telegram_goozali_handler.py rename to src/telegram_handler/telegram_indeed_handler.py index 775d8b9..21dd975 100644 --- a/src/telegram_handler/telegram_goozali_handler.py +++ b/src/telegram_handler/telegram_indeed_handler.py @@ -9,20 +9,19 @@ from src.jobspy.scrapers.utils import create_logger from src.telegram_bot import TelegramBot from src.telegram_handler.telegram_handler import TelegramHandler -logger = create_logger("TelegramAllHandler") - -class TelegramGoozaliHandler(TelegramHandler): +class TelegramIndeedHandler(TelegramHandler): def __init__(self, locations: list[str], title_filters: list[str], search_term: str): - self.sites_to_scrap = [Site.GOOZALI] + self.sites_to_scrap = [Site.INDEED] self.locations = locations self.search_term = search_term self.title_filters = title_filters self.telegramBot = TelegramBot() self.jobRepository = JobRepository() + self.logger = create_logger(f"Telegram{self.sites_to_scrap[0].name.title()}Handler") async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): - logger.info("start handling") + self.logger.info("start handling") jobs = scrape_jobs( site_name=self.sites_to_scrap, search_term=self.search_term, @@ -32,8 +31,8 @@ class TelegramGoozaliHandler(TelegramHandler): country_indeed='israel', filter_by_title=self.title_filters ) - logger.info(f"Found {len(jobs)} jobs") + self.logger.info(f"Found {len(jobs)} jobs") new_jobs = self.jobRepository.insertManyIfNotFound(jobs) for newJob in new_jobs: await self.telegramBot.sendJob(newJob) - logger.info("finished handling") + self.logger.info("finished handling")