From 257f24ef1918618aac4d28919bb47260089bd237 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 26 Dec 2024 17:14:50 +0200 Subject: [PATCH] findall command works --- src/jobspy/__init__.py | 16 +++++++++-- src/jobspy/telegram_handler.py | 48 -------------------------------- src/{jobspy => }/main.py | 36 ++++++++++++++---------- src/{jobspy => }/telegram_bot.py | 8 ++---- src/telegram_handler.py | 37 ++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 70 deletions(-) delete mode 100644 src/jobspy/telegram_handler.py rename src/{jobspy => }/main.py (53%) rename src/{jobspy => }/telegram_bot.py (74%) create mode 100644 src/telegram_handler.py diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index ddf9068..22ebf31 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -1,4 +1,6 @@ from __future__ import annotations + +import re from threading import Lock import pandas as pd @@ -48,8 +50,9 @@ def scrape_jobs( hours_old: int = None, enforce_annual_salary: bool = False, verbose: int = 2, + filter_by_title:list[str] = None, ** kwargs, -) -> pd.DataFrame: +) -> list[JobPost]: """ Simultaneously scrapes job data from multiple job sites. :return: pandas dataframe containing job data @@ -148,4 +151,13 @@ def scrape_jobs( except Exception as e: logger.error(f"Future Error occurred: {e}") - return merged_jobs + def filter_jobs_by_title_name(job: JobPost): + for filter_title in filter_by_title: + if re.search(filter_title, job.title, re.IGNORECASE): + logger.info(f"job filtered out by title: {job.id} , { + job.title} , found {filter_title}") + return False + + return True + + return list(filter(filter_jobs_by_title_name, merged_jobs)) diff --git a/src/jobspy/telegram_handler.py b/src/jobspy/telegram_handler.py deleted file mode 100644 index 348446a..0000000 --- a/src/jobspy/telegram_handler.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from dotenv import load_dotenv -from telegram import Bot, Update, ReplyKeyboardMarkup -from telegram.ext import ( - Application, - CommandHandler, - ContextTypes, - ConversationHandler, - MessageHandler, - filters, -) -from .scrapers.utils import create_logger - -load_dotenv() - -logger = create_logger("TelegramBot") - - -class TelegramHandler: - def __init__(self): - self._api_token = os.getenv("TELEGRAM_API_TOKEN") - self.chatId = os.getenv("TELEGRAM_CHAT_ID") - self.bot = Bot(token=self._api_token) - # Create the Application and pass it your bot's token. - self.application = Application.builder().token(self._api_token).build() - - async def findAll(self, update: Update, context: ContextTypes.DEFAULT_TYPE): - """Start the conversation and ask user for input.""" - await update.message.reply_text( - "Hi! My name is Doctor Botter. I will hold a more complex conversation with you. " - "Why don't you tell me something about yourself?" - ) - - async def find_glassdoor(self, update: Update, context: ContextTypes.DEFAULT_TYPE): - """Start the conversation and ask user for input.""" - await update.message.reply_text( - "Hi! My name is Doctor Botter. I will hold a more complex conversation with you. " - "Why don't you tell me something about yourself?" - ) - - def handler(self): - self.application.add_handler(CommandHandler("find", self.findAll)) - self.application.add_handler(CommandHandler("galssdoor", self.find_glassdoor)) - self.application.add_handler(CommandHandler("linkedin", self.findAll)) - self.application.add_handler(CommandHandler("indeed", self.findAll)) - self.application.add_handler(CommandHandler("goozali", self.findAll)) - # Run the bot until the user presses Ctrl-C - self.application.run_polling(allowed_updates=Update.ALL_TYPES) \ No newline at end of file diff --git a/src/jobspy/main.py b/src/main.py similarity index 53% rename from src/jobspy/main.py rename to src/main.py index a01d3a1..cd28e90 100644 --- a/src/jobspy/main.py +++ b/src/main.py @@ -1,26 +1,28 @@ -import asyncio import os import re +from telegram import Update from telegram.ext import Application, CommandHandler from src.jobspy import Site, scrape_jobs from src.jobspy.db.job_repository import JobRepository from src.jobspy.jobs import JobPost from src.jobspy.scrapers.utils import create_logger -from src.jobspy.telegram_bot import TelegramBot -from src.jobspy.telegram_handler import TelegramHandler +from src.telegram_bot import TelegramBot +from src.telegram_handler import TelegramAllHandler logger = create_logger("Main") -filter_by_title: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", "Front End", "DevOps", "Physical", "Staff", - "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"] +title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", + "Front End", "DevOps", "Physical", "Staff", + "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", + "Data Engineer", "DevSecOps"] def filter_jobs_by_title_name(job: JobPost): - for filter_title in filter_by_title: + for filter_title in title_filters: if re.search(filter_title, job.title, re.IGNORECASE): logger.info(f"job filtered out by title: {job.id} , { - job.title} , found {filter_title}") + job.title} , found {filter_title}") return False return True @@ -29,7 +31,6 @@ def filter_jobs_by_title_name(job: JobPost): async def main(): telegramBot = TelegramBot() jobRepository = JobRepository() - tg_handler = TelegramHandler() # sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI] sites_to_scrap = [Site.GOOZALI] # sites_to_scrap = [Site.GOOZALI] @@ -48,14 +49,21 @@ async def main(): for newJob in newJobs: await telegramBot.sendJob(newJob) + # Run the async main function if __name__ == "__main__": # asyncio.run(main()) + logger.info("Starting initialize ") _api_token = os.getenv("TELEGRAM_API_TOKEN") + tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI], + locations=["Tel Aviv, Israel", "Ramat Gan, Israel", + "Central, Israel", "Rehovot ,Israel"], + title_filters=title_filters) application = Application.builder().token(_api_token).build() - application.add_handler(CommandHandler("find", findAll)) - application.add_handler(CommandHandler("galssdoor", find_glassdoor)) - application.add_handler(CommandHandler("linkedin", find_linkedin)) - application.add_handler(CommandHandler("indeed", find_indeed)) - application.add_handler(CommandHandler("goozali", find_goozali)) - tg_handler = TelegramHandler().handler() + application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) + # application.add_handler(CommandHandler("galssdoor", find_glassdoor)) + # application.add_handler(CommandHandler("linkedin", find_linkedin)) + # application.add_handler(CommandHandler("indeed", find_indeed)) + # application.add_handler(CommandHandler("goozali", find_goozali)) + logger.info("Run polling from telegram") + application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/jobspy/telegram_bot.py b/src/telegram_bot.py similarity index 74% rename from src/jobspy/telegram_bot.py rename to src/telegram_bot.py index 0ab1b8a..d34084b 100644 --- a/src/jobspy/telegram_bot.py +++ b/src/telegram_bot.py @@ -9,8 +9,8 @@ from telegram.ext import ( MessageHandler, filters, ) -from .jobs import JobPost -from .scrapers.utils import create_logger +from src.jobspy.jobs import JobPost +from src.jobspy.scrapers.utils import create_logger load_dotenv() @@ -23,10 +23,6 @@ class TelegramBot: self._api_token = os.getenv("TELEGRAM_API_TOKEN") self.chatId = os.getenv("TELEGRAM_CHAT_ID") self.bot = Bot(token=self._api_token) - # Create the Application and pass it your bot's token. - self.application = Application.builder().token(self._api_token).build() - # Run the bot until the user presses Ctrl-C - self.application.run_polling(allowed_updates=Update.ALL_TYPES) async def sendJob(self, job: JobPost): """ diff --git a/src/telegram_handler.py b/src/telegram_handler.py new file mode 100644 index 0000000..2aeb970 --- /dev/null +++ b/src/telegram_handler.py @@ -0,0 +1,37 @@ +from telegram import Update +from telegram.ext import ( + ContextTypes, +) + +from src.jobspy import Site, scrape_jobs +from src.jobspy.db.job_repository import JobRepository +from src.jobspy.scrapers.utils import create_logger +from src.telegram_bot import TelegramBot + +logger = create_logger("TelegramAllHandler") + + +class TelegramAllHandler: + def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str]): + self.sites_to_scrap = sites + self.locations = locations + self.title_filters = title_filters + self.telegramBot = TelegramBot() + self.jobRepository = JobRepository() + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + logger.info("start handling") + jobs = scrape_jobs( + site_name=self.sites_to_scrap, + search_term="software engineer", + locations=self.locations, + results_wanted=200, + hours_old=48, + country_indeed='israel', + filter_by_title=self.title_filters + ) + logger.info(f"Found {len(jobs)} jobs") + new_jobs = self.jobRepository.insertManyIfNotFound(jobs) + for newJob in new_jobs: + await self.telegramBot.sendJob(newJob) + logger.info("finished handling")