diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index ddf9068..22ebf31 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -1,4 +1,6 @@ from __future__ import annotations + +import re from threading import Lock import pandas as pd @@ -48,8 +50,9 @@ def scrape_jobs( hours_old: int = None, enforce_annual_salary: bool = False, verbose: int = 2, + filter_by_title:list[str] = None, ** kwargs, -) -> pd.DataFrame: +) -> list[JobPost]: """ Simultaneously scrapes job data from multiple job sites. :return: pandas dataframe containing job data @@ -148,4 +151,13 @@ def scrape_jobs( except Exception as e: logger.error(f"Future Error occurred: {e}") - return merged_jobs + def filter_jobs_by_title_name(job: JobPost): + for filter_title in filter_by_title: + if re.search(filter_title, job.title, re.IGNORECASE): + logger.info(f"job filtered out by title: {job.id} , { + job.title} , found {filter_title}") + return False + + return True + + return list(filter(filter_jobs_by_title_name, merged_jobs)) diff --git a/src/jobspy/main.py b/src/jobspy/main.py deleted file mode 100644 index 88a097c..0000000 --- a/src/jobspy/main.py +++ /dev/null @@ -1,47 +0,0 @@ -import asyncio -import re -from src.jobspy import Site, scrape_jobs -from src.jobspy.db.job_repository import JobRepository -from src.jobspy.jobs import JobPost -from src.jobspy.scrapers.utils import create_logger -from src.jobspy.telegram_bot import TelegramBot - -logger = create_logger("Main") -filter_by_title: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", "Front End", "DevOps", "Physical", "Staff", - "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"] - - -def filter_jobs_by_title_name(job: JobPost): - for filter_title in filter_by_title: - if re.search(filter_title, job.title, re.IGNORECASE): - logger.info(f"job filtered out by title: {job.id} , { - job.title} , found {filter_title}") - return False - - return True - - -async def main(): - telegramBot = TelegramBot() - jobRepository = JobRepository() - # sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI] - sites_to_scrap = [Site.GOOZALI] - # sites_to_scrap = [Site.GOOZALI] - jobs = scrape_jobs( - site_name=sites_to_scrap, - search_term="software engineer", - locations=["Tel Aviv, Israel", "Ramat Gan, Israel", - "Central, Israel", "Rehovot ,Israel"], - results_wanted=200, - hours_old=48, - country_indeed='israel' - ) - logger.info(f"Found {len(jobs)} jobs") - jobs = list(filter(filter_jobs_by_title_name, jobs)) - newJobs = jobRepository.insertManyIfNotFound(jobs) - for newJob in newJobs: - await telegramBot.sendJob(newJob) - -# Run the async main function -if __name__ == "__main__": - asyncio.run(main()) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..48d88a8 --- /dev/null +++ b/src/main.py @@ -0,0 +1,51 @@ +import os + +from telegram import Update +from telegram.ext import Application, CommandHandler + +from src.jobspy import Site +from src.jobspy.scrapers.utils import create_logger +from src.telegram_handler import TelegramIndeedHandler, TelegramDefaultHandler + +logger = create_logger("Main") +title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", + "Front End", "DevOps", "Physical", "Staff", + "automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", + "Data Engineer", "DevSecOps"] + +if __name__ == "__main__": + logger.info("Starting initialize ") + _api_token = os.getenv("TELEGRAM_API_TOKEN") + search_term = "software engineer" + locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"] + application = Application.builder().token(_api_token).build() + tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI], + locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler("findAll", tg_handler_all.handle)) + # Goozali + tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI], + locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.GOOZALI.value, tg_handler_goozali.handle)) + # GlassDoor + tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR], + locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.GLASSDOOR.value, tg_handler_glassdoor.handle)) + # LinkeDin + tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN], + locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.LINKEDIN.value, tg_handler_linkedin.handle)) + # Indeed + tg_handler_indeed = TelegramIndeedHandler(locations=locations, + title_filters=title_filters, + search_term=search_term) + application.add_handler(CommandHandler(Site.INDEED.value, tg_handler_indeed.handle)) + logger.info("Run polling from telegram") + application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/jobspy/telegram_bot.py b/src/telegram_bot.py similarity index 71% rename from src/jobspy/telegram_bot.py rename to src/telegram_bot.py index d71f511..d34084b 100644 --- a/src/jobspy/telegram_bot.py +++ b/src/telegram_bot.py @@ -1,9 +1,16 @@ import os from dotenv import load_dotenv -from telegram import Bot - -from .jobs import JobPost -from .scrapers.utils import create_logger +from telegram import Bot, Update +from telegram.ext import ( + Application, + CommandHandler, + ContextTypes, + ConversationHandler, + MessageHandler, + filters, +) +from src.jobspy.jobs import JobPost +from src.jobspy.scrapers.utils import create_logger load_dotenv() @@ -21,8 +28,7 @@ class TelegramBot: """ Send JobPost details to Telegram chat. """ - message = f"New Job Posted:\n\n" \ - f"Job ID: {job.id}\n" \ + message = f"Job ID: {job.id}\n" \ f"Job Title: {job.title}\n" \ f"Company: {job.company_name}\n" \ f"Location: {job.location.display_location()}\n" \ @@ -32,4 +38,4 @@ class TelegramBot: logger.info(f"Sent job to Telegram: {job.id}") except Exception as e: logger.error(f"Failed to send job to Telegram: {job.id}") - logger.error(f"Error: {e}") + logger.error(f"Error: {e}") \ No newline at end of file diff --git a/src/telegram_handler/__init__.py b/src/telegram_handler/__init__.py new file mode 100644 index 0000000..0316d08 --- /dev/null +++ b/src/telegram_handler/__init__.py @@ -0,0 +1,2 @@ +from .telegram_default_handler import TelegramDefaultHandler +from .telegram_indeed_handler import TelegramIndeedHandler diff --git a/src/telegram_handler/telegram_default_handler.py b/src/telegram_handler/telegram_default_handler.py new file mode 100644 index 0000000..49b71dd --- /dev/null +++ b/src/telegram_handler/telegram_default_handler.py @@ -0,0 +1,40 @@ +from telegram import Update +from telegram.ext import ( + ContextTypes, +) + +from src.jobspy import Site, scrape_jobs +from src.jobspy.db.job_repository import JobRepository +from src.jobspy.scrapers.utils import create_logger +from src.telegram_bot import TelegramBot +from src.telegram_handler.telegram_handler import TelegramHandler + + +class TelegramDefaultHandler(TelegramHandler): + def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str], search_term: str): + self.sites_to_scrap = sites + self.locations = locations + self.search_term = search_term + self.title_filters = title_filters + self.telegramBot = TelegramBot() + self.jobRepository = JobRepository() + if len(sites) == 1: + self.logger = create_logger(f"Telegram{sites[0].name.title()}Handler") + else: + self.logger = create_logger("TelegramAllHandler") + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + self.logger.info("start handling") + jobs = scrape_jobs( + site_name=self.sites_to_scrap, + search_term=self.search_term, + locations=self.locations, + results_wanted=200, + hours_old=48, + filter_by_title=self.title_filters + ) + self.logger.info(f"Found {len(jobs)} jobs") + new_jobs = self.jobRepository.insertManyIfNotFound(jobs) + for newJob in new_jobs: + await self.telegramBot.sendJob(newJob) + self.logger.info("finished handling") diff --git a/src/telegram_handler/telegram_handler.py b/src/telegram_handler/telegram_handler.py new file mode 100644 index 0000000..14c71a3 --- /dev/null +++ b/src/telegram_handler/telegram_handler.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod + +from telegram import Update +from telegram.ext import ContextTypes + + +# Define an abstract class +class TelegramHandler(ABC): + + @abstractmethod + def handle(self, update: Update, context: ContextTypes): + pass # This is an abstract method, no implementation here. \ No newline at end of file diff --git a/src/telegram_handler/telegram_indeed_handler.py b/src/telegram_handler/telegram_indeed_handler.py new file mode 100644 index 0000000..21dd975 --- /dev/null +++ b/src/telegram_handler/telegram_indeed_handler.py @@ -0,0 +1,38 @@ +from telegram import Update +from telegram.ext import ( + ContextTypes, +) + +from src.jobspy import Site, scrape_jobs +from src.jobspy.db.job_repository import JobRepository +from src.jobspy.scrapers.utils import create_logger +from src.telegram_bot import TelegramBot +from src.telegram_handler.telegram_handler import TelegramHandler + + +class TelegramIndeedHandler(TelegramHandler): + def __init__(self, locations: list[str], title_filters: list[str], search_term: str): + self.sites_to_scrap = [Site.INDEED] + self.locations = locations + self.search_term = search_term + self.title_filters = title_filters + self.telegramBot = TelegramBot() + self.jobRepository = JobRepository() + self.logger = create_logger(f"Telegram{self.sites_to_scrap[0].name.title()}Handler") + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + self.logger.info("start handling") + jobs = scrape_jobs( + site_name=self.sites_to_scrap, + search_term=self.search_term, + locations=self.locations, + results_wanted=200, + hours_old=48, + country_indeed='israel', + filter_by_title=self.title_filters + ) + self.logger.info(f"Found {len(jobs)} jobs") + new_jobs = self.jobRepository.insertManyIfNotFound(jobs) + for newJob in new_jobs: + await self.telegramBot.sendJob(newJob) + self.logger.info("finished handling")