From 1f2b1b3892505c0e1002d2f0d7a7d540cb9035a0 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 2 Jan 2025 16:19:05 +0200 Subject: [PATCH 01/22] init position state works --- src/db/Position.py | 5 + src/db/User.py | 13 ++ src/db/job_repository.py | 3 +- src/db/monogo_db.py | 2 + src/db/position_repository.py | 106 +++++++++++ src/db/user_repository.py | 99 ++++++++++ src/main.py | 59 +----- .../telegram_start_handler.py | 174 ++++++++++++++++++ 8 files changed, 409 insertions(+), 52 deletions(-) create mode 100644 src/db/Position.py create mode 100644 src/db/User.py create mode 100644 src/db/position_repository.py create mode 100644 src/db/user_repository.py create mode 100644 src/telegram_handler/telegram_start_handler.py diff --git a/src/db/Position.py b/src/db/Position.py new file mode 100644 index 0000000..a7a98a7 --- /dev/null +++ b/src/db/Position.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class Position(BaseModel): + name: str diff --git a/src/db/User.py b/src/db/User.py new file mode 100644 index 0000000..490bb52 --- /dev/null +++ b/src/db/User.py @@ -0,0 +1,13 @@ +from datetime import datetime +from typing import Optional, Union + +from pydantic import BaseModel + +from db.Position import Position + + +class User(BaseModel): + full_name: str + username: str + chat_id: Union[int,str] + field: Optional[Position] = None diff --git a/src/db/job_repository.py b/src/db/job_repository.py index 290031f..53607fd 100644 --- a/src/db/job_repository.py +++ b/src/db/job_repository.py @@ -3,9 +3,9 @@ from typing import Optional from dotenv import load_dotenv from pymongo import UpdateOne -from .monogo_db import MongoDB from jobspy import create_logger from jobspy.jobs import JobPost +from .monogo_db import mongo_client load_dotenv() @@ -21,7 +21,6 @@ class JobRepository: self = super().__new__(cls) cls._instance = self self.logger = create_logger("JobRepository") - mongo_client = MongoDB() self.collection = mongo_client.db["jobs"] return cls._instance diff --git a/src/db/monogo_db.py b/src/db/monogo_db.py index bcac938..eea7d84 100644 --- a/src/db/monogo_db.py +++ b/src/db/monogo_db.py @@ -32,3 +32,5 @@ class MongoDB: self.db = client[database_name] logger.info("Succeed connect to MongoDB") return cls._instance + +mongo_client = MongoDB() diff --git a/src/db/position_repository.py b/src/db/position_repository.py new file mode 100644 index 0000000..af068b8 --- /dev/null +++ b/src/db/position_repository.py @@ -0,0 +1,106 @@ +from typing import Optional + +from dotenv import load_dotenv +from pymongo import UpdateOne + +from jobspy import create_logger +from .Position import Position +from .monogo_db import mongo_client + +load_dotenv() + + +class PositionRepository: + _instance = None + + def __new__(cls): + + if cls._instance is not None: + return cls._instance + + self = super().__new__(cls) + cls._instance = self + self.logger = create_logger("PositionRepository") + self.collection = mongo_client.db["field"] + return cls._instance + + def find_all(self) -> list[Position]: + positions = list(self.collection.find({})) + return [Position(**position) for position in positions] + + def find_by_id(self, position_id: str) -> Optional[Position]: + """ + Finds a position document in the collection by its ID. + + Args: + position_id: The ID of the position to find. + + Returns: + The position document if found, otherwise None. + """ + result = self.collection.find_one({"id": position_id}) + return Position(**result) + + def update(self, position: Position) -> bool: + """ + Updates a Position in the database. + + Args: + position: A dictionary representing the Position data. + + Returns: + True if the update was successful, False otherwise. + """ + result = self.collection.update_one({"id": position.id}, {"$set": position.model_dump()}) + return result.modified_count > 0 + + def insert_position(self, position: Position): + """ + Inserts a new position posting into the database collection. + + Args: + position (Position): The Position object to be inserted. + + Raises: + Exception: If an error occurs during insertion. + """ + self.collection.insert_one(position.model_dump()) + self.logger.info(f"Inserted new position with name {position.name}.") + + def insert_many_if_not_found(self, positions: list[Position]) -> tuple[list[Position], list[Position]]: + """ + Perform bulk upserts for a list of Position objects into a MongoDB collection. + Only insert new positions and return the list of newly inserted positions. + """ + operations = [] + new_positions = [] # List to store the new positions inserted into MongoDB + old_positions = [] # List to store the new positions inserted into MongoDB + for position in positions: + position_dict = position.model_dump() + operations.append( + UpdateOne( + {"id": position.id}, # Match by `id` + # Only set positions if the position is being inserted (not updated) + {"$setOnInsert": position_dict}, + upsert=True # Insert if not found, but do not update if already exists + ) + ) + + if operations: + # Execute all operations in bulk + result = self.collection.bulk_write(operations) + self.logger.info(f"Matched: {result.matched_count}, Upserts: { + result.upserted_count}, Modified: {result.modified_count}") + + # Get the newly inserted positions (those that were upserted) + # The `upserted_count` corresponds to how many new documents were inserted + for i, position in enumerate(positions): + if result.upserted_count > 0 and i < result.upserted_count: + new_positions.append(position) + else: + old_positions.append(position) + + return old_positions, new_positions + + +position_repository = PositionRepository() diff --git a/src/db/user_repository.py b/src/db/user_repository.py new file mode 100644 index 0000000..70c592a --- /dev/null +++ b/src/db/user_repository.py @@ -0,0 +1,99 @@ +from typing import Optional + +from dotenv import load_dotenv +from pymongo import UpdateOne + +from jobspy import create_logger +from .User import User +from .monogo_db import mongo_client + +load_dotenv() + + +class UserRepository: + _instance = None + + def __new__(cls): + + if cls._instance is not None: + return cls._instance + + self = super().__new__(cls) + cls._instance = self + self.logger = create_logger("UserRepository") + self.collection = mongo_client.db["user"] + return cls._instance + + def find_by_id(self, user_id: str) -> Optional[User]: + """ + Finds a user document in the collection by its ID. + + Args: + user_id: The ID of the user to find. + + Returns: + The user document if found, otherwise None. + """ + result = self.collection.find_one({"id": user_id}) + return User(**result) + + def update(self, user: User) -> bool: + """ + Updates a User in the database. + + Args: + user: A dictionary representing the User data. + + Returns: + True if the update was successful, False otherwise. + """ + result = self.collection.update_one({"id": user.id}, {"$set": user.model_dump()}) + return result.modified_count > 0 + + def insert_user(self, user: User): + """ + Inserts a new user posting into the database collection. + + Args: + user (User): The User object to be inserted. + + Raises: + Exception: If an error occurs during insertion. + """ + self.collection.insert_one(user.model_dump()) + self.logger.info(f"Inserted new user with username {user.username}.") + + def insert_many_if_not_found(self, users: list[User]) -> tuple[list[User], list[User]]: + """ + Perform bulk upserts for a list of User objects into a MongoDB collection. + Only insert new users and return the list of newly inserted users. + """ + operations = [] + new_users = [] # List to store the new users inserted into MongoDB + old_users = [] # List to store the new users inserted into MongoDB + for user in users: + user_dict = user.model_dump() + operations.append( + UpdateOne( + {"id": user.id}, # Match by `id` + # Only set fields if the user is being inserted (not updated) + {"$setOnInsert": user_dict}, + upsert=True # Insert if not found, but do not update if already exists + ) + ) + + if operations: + # Execute all operations in bulk + result = self.collection.bulk_write(operations) + self.logger.info(f"Matched: {result.matched_count}, Upserts: { + result.upserted_count}, Modified: {result.modified_count}") + + # Get the newly inserted users (those that were upserted) + # The `upserted_count` corresponds to how many new documents were inserted + for i, user in enumerate(users): + if result.upserted_count > 0 and i < result.upserted_count: + new_users.append(user) + else: + old_users.append(user) + + return old_users, new_users diff --git a/src/main.py b/src/main.py index 200c277..c52ef24 100644 --- a/src/main.py +++ b/src/main.py @@ -1,67 +1,26 @@ -import os +from telegram import Update, ReplyKeyboardMarkup, ReplyKeyboardRemove +from telegram.ext import Application, CommandHandler, ConversationHandler, \ + MessageHandler, filters, ContextTypes -from telegram import Update -from telegram.ext import Application, CommandHandler, CallbackQueryHandler, Updater - -from jobspy.scrapers.site import Site +from config.settings import settings from jobspy.scrapers.utils import create_logger -from telegram_handler import TelegramDefaultHandler -from telegram_handler.button_callback.telegram_callback_handler import TelegramCallHandler +from telegram_handler.telegram_start_handler import start_conv_handler logger = create_logger("Main") -_api_token = os.getenv("TELEGRAM_API_TOKEN") +_api_token = settings.telegram_api_token application = Application.builder().token(_api_token).build() title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", "Front End", "DevOps", "Physical", "Staff", "automation", "BI ", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"] - -async def stop(update, context): - logger.info("Stop polling from telegram") - application.stop_running() - if __name__ == "__main__": logger.info("Starting initialize ") search_term = "software engineer" locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"] - tg_callback_handler = TelegramCallHandler() - tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI], - locations=locations, - title_filters=title_filters, - search_term=search_term) - application.add_handler(CommandHandler("find", tg_handler_all.handle)) - # Goozali - tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI], - locations=locations, - title_filters=title_filters, - search_term=search_term) - application.add_handler(CommandHandler( - Site.GOOZALI.value, tg_handler_goozali.handle)) - # GlassDoor - tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR], - locations=locations, - title_filters=title_filters, - search_term=search_term) - application.add_handler(CommandHandler( - Site.GLASSDOOR.value, tg_handler_glassdoor.handle)) - # LinkeDin - tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN], - locations=locations, - title_filters=title_filters, - search_term=search_term) - application.add_handler(CommandHandler( - Site.LINKEDIN.value, tg_handler_linkedin.handle)) - # Indeed - tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED], - locations=locations, - title_filters=title_filters, - search_term=search_term) - application.add_handler(CommandHandler( - Site.INDEED.value, tg_handler_indeed.handle)) - application.add_handler(CallbackQueryHandler( - tg_callback_handler.button_callback)) - application.add_handler(CommandHandler('stop', stop)) + application.add_handler(start_conv_handler) + + # application.add_handler(CommandHandler('start', start_handler.handle)) logger.info("Run polling from telegram") application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py new file mode 100644 index 0000000..67f3de6 --- /dev/null +++ b/src/telegram_handler/telegram_start_handler.py @@ -0,0 +1,174 @@ +from enum import Enum + +from telegram import Update, Chat, KeyboardButton, ReplyKeyboardMarkup, ReplyKeyboardRemove +from telegram.ext import ( + ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters, +) + +from db.User import User +from db.position_repository import position_repository +from db.user_repository import UserRepository +from jobspy.scrapers.utils import create_logger +from telegram_bot import TelegramBot +from telegram_handler.telegram_handler import TelegramHandler + + +class Flow(Enum): + POSITION = 0 + ADDRESS = 1 + FILTERS = 2 + EXPERIENCE = 3 + RETRY = 4 + + +class TelegramStartHandler(TelegramHandler): + + def __init__(self): + self.telegram_bot = TelegramBot() + self.user_repository = UserRepository() + self.logger = create_logger("TelegramStartHandler") + self.positions = position_repository.find_all() + self.temp_user = None + self.last_state = None + + async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> Flow: + """Starts the conversation and asks the user about their position.""" + chat: Chat = update.message.chat + user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) + self.user_repository.insert_user(user) + + buttons = [[KeyboardButton(position.name)] for position in self.positions] + reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, + input_field_placeholder=Flow.POSITION.name) + await update.message.reply_text( + "Hi! My name is Professor Bot. I will hold a conversation with you. " + "Send /cancel to stop talking to me.\n\n" + "What Position are you looking for?", + reply_markup=reply_markup, + ) + + return Flow.POSITION + + async def position(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> Flow: + """Stores the selected position and asks for a photo.""" + self.last_state = Flow.POSITION + user = update.message.from_user + self.logger.info("Position of %s: %s", user.first_name, update.message.text) + position = next((p for p in self.positions if p.name == update.message.text), None) + if not position: + await update.message.reply_text("Position not found") + buttons2 = [[KeyboardButton(position.name)] for position in self.positions] + reply_markup = ReplyKeyboardMarkup(buttons2, one_time_keyboard=True, + input_field_placeholder=Flow.POSITION.name) + await update.message.reply_text( + "What Position are you looking for?", + reply_markup=reply_markup, + ) + return Flow.POSITION + + await update.message.reply_text( + "I see! Please send me a photo of yourself, " + "so I know what you look like, or send /skip if you don't want to.", + reply_markup=ReplyKeyboardRemove(), + ) + + return Flow.ADDRESS + + async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Stores the photo and asks for a location.""" + user = update.message.from_user + photo_file = await update.message.photo[-1].get_file() + await photo_file.download_to_drive("user_photo.jpg") + self.logger.info("Photo of %s: %s", user.first_name, "user_photo.jpg") + await update.message.reply_text( + "Gorgeous! Now, send me your location please, or send /skip if you don't want to." + ) + + return Flow.FILTERS.value + + async def filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Stores the location and asks for some info about the user.""" + user = update.message.from_user + user_location = update.message.location + self.logger.info( + "Location of %s: %f / %f", user.first_name, user_location.latitude, user_location.longitude + ) + await update.message.reply_text( + "Maybe I can visit you sometime! At last, tell me something about yourself." + ) + + return Flow.EXPERIENCE.value + + async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Skips the location and asks for info about the user.""" + user = update.message.from_user + self.logger.info("User %s did not send a location.", user.first_name) + await update.message.reply_text( + "You seem a bit paranoid! At last, tell me something about yourself." + ) + + return Flow.EXPERIENCE.value + + async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Stores the info about the user and ends the conversation.""" + user = update.message.from_user + self.logger.info("Bio of %s: %s", user.first_name, update.message.text) + await update.message.reply_text("Thank you! I hope we can talk again some day.") + + return ConversationHandler.END + + async def cancel(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Cancels and ends the conversation.""" + user = update.message.from_user + self.logger.info("User %s canceled the conversation.", user.first_name) + await update.message.reply_text( + "Bye! I hope we can talk again some day.", reply_markup=ReplyKeyboardRemove() + ) + + return ConversationHandler.END + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + self.logger.info("start handling") + # chat: Chat = update.message.chat + # chat.id - 368620919 + # chat.username - 'Qw1zeR' + # chat.full_name - 'Qw1zeR' + # user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) + # self.user_repository.insert_user(user) + # fields = field_repository.find_all() # Get all fields from the database + # buttons = [[KeyboardButton(field.name)] for field in fields] + # reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True) + # + # await update.message.reply_text("Please select your field:", reply_markup=reply_markup) + # await self.telegram_bot.set_message_reaction( + # update.message.message_id, ReactionEmoji.FIRE) + # site_names = [site.name for site in self.sites_to_scrap] + # site_names_print = ", ".join(site_names) + # await self.telegram_bot.send_text( + # f"Start scarping: {site_names_print}") + # self.logger.info(f"Found {len(jobs)} jobs") + # self.jobRepository.insert_many_if_not_found(filtered_out_jobs) + # old_jobs, new_jobs = self.jobRepository.insert_many_if_not_found(jobs) + # for newJob in new_jobs: + # await self.telegram_bot.send_job(newJob) + # self.logger.info(f"Found {len(old_jobs)} old jobs") + # await self.telegram_bot.send_text( + # f"Finished scarping: {site_names_print}") + self.logger.info("finished handling") + + +start_handler = TelegramStartHandler() +start_conv_handler = ConversationHandler( + entry_points=[CommandHandler("start", start_handler.start)], + states={ + Flow.POSITION: [MessageHandler(filters.TEXT, start_handler.position)] + # Flow.SAVE_POSITION: [MessageHandler(filters.TEXT, start_handler.position)] + # Flow.ADDRESS: [MessageHandler(filters.PHOTO, photo), CommandHandler("skip", skip_photo)], + # Flow.FILTERS: [ + # MessageHandler(filters.LOCATION, location), + # CommandHandler("skip", skip_location), + # ], + # Flow.EXPERIENCE: [MessageHandler(filters.TEXT & ~filters.COMMAND, bio)], + }, + fallbacks=[CommandHandler("cancel", start_handler.cancel)], +) From 97ece953f6b5f9ecf8c77d8dc83c98109de06729 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 2 Jan 2025 17:33:14 +0200 Subject: [PATCH 02/22] full flow works with no saving to db --- .../telegram_start_handler.py | 127 +++++++++++------- 1 file changed, 78 insertions(+), 49 deletions(-) diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 67f3de6..8af02c3 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -18,20 +18,23 @@ class Flow(Enum): ADDRESS = 1 FILTERS = 2 EXPERIENCE = 3 - RETRY = 4 + VERIFY_ADDRESS = 4 + VERIFY_FILTERS = 5 + SKIP_FILTERS = 6 class TelegramStartHandler(TelegramHandler): def __init__(self): + self.filters = None self.telegram_bot = TelegramBot() self.user_repository = UserRepository() self.logger = create_logger("TelegramStartHandler") self.positions = position_repository.find_all() self.temp_user = None - self.last_state = None + self.cities = None - async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> Flow: + async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Starts the conversation and asks the user about their position.""" chat: Chat = update.message.chat user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) @@ -47,64 +50,55 @@ class TelegramStartHandler(TelegramHandler): reply_markup=reply_markup, ) - return Flow.POSITION + return Flow.POSITION.value - async def position(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> Flow: + async def position(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Stores the selected position and asks for a photo.""" - self.last_state = Flow.POSITION user = update.message.from_user self.logger.info("Position of %s: %s", user.first_name, update.message.text) position = next((p for p in self.positions if p.name == update.message.text), None) if not position: await update.message.reply_text("Position not found") - buttons2 = [[KeyboardButton(position.name)] for position in self.positions] - reply_markup = ReplyKeyboardMarkup(buttons2, one_time_keyboard=True, + buttons = [[KeyboardButton(position.name)] for position in self.positions] + reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, input_field_placeholder=Flow.POSITION.name) await update.message.reply_text( "What Position are you looking for?", reply_markup=reply_markup, ) - return Flow.POSITION + return Flow.POSITION.value await update.message.reply_text( - "I see! Please send me a photo of yourself, " - "so I know what you look like, or send /skip if you don't want to.", - reply_markup=ReplyKeyboardRemove(), + "Gorgeous! Now, send me cites you want to search for\n" + "Example: Rishon Lezion,Petah Tikva,..." ) - return Flow.ADDRESS + return Flow.ADDRESS.value async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Stores the photo and asks for a location.""" user = update.message.from_user - photo_file = await update.message.photo[-1].get_file() - await photo_file.download_to_drive("user_photo.jpg") - self.logger.info("Photo of %s: %s", user.first_name, "user_photo.jpg") + self.cities = update.message.text.split(",") + reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, + input_field_placeholder=Flow.VERIFY_ADDRESS.name) + await update.message.reply_text(f"Did you choose: {self.cities} ?", reply_markup=reply_markup) + + return Flow.VERIFY_ADDRESS.value + + async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + if update.message.text == "No": + await update.message.reply_text( + "Please send the cities\n" + "Example: Rishon Lezion,Petah Tikva,..." + ) + return Flow.ADDRESS.value + + reply_markup = ReplyKeyboardMarkup([["1", "2"]], one_time_keyboard=True, + input_field_placeholder=Flow.VERIFY_ADDRESS.name) await update.message.reply_text( - "Gorgeous! Now, send me your location please, or send /skip if you don't want to." - ) - - return Flow.FILTERS.value - - async def filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: - """Stores the location and asks for some info about the user.""" - user = update.message.from_user - user_location = update.message.location - self.logger.info( - "Location of %s: %f / %f", user.first_name, user_location.latitude, user_location.longitude - ) - await update.message.reply_text( - "Maybe I can visit you sometime! At last, tell me something about yourself." - ) - - return Flow.EXPERIENCE.value - - async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: - """Skips the location and asks for info about the user.""" - user = update.message.from_user - self.logger.info("User %s did not send a location.", user.first_name) - await update.message.reply_text( - "You seem a bit paranoid! At last, tell me something about yourself." + "Maybe I can visit you sometime!\n" + "Tell Your experience", + reply_markup=reply_markup ) return Flow.EXPERIENCE.value @@ -112,7 +106,44 @@ class TelegramStartHandler(TelegramHandler): async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Stores the info about the user and ends the conversation.""" user = update.message.from_user - self.logger.info("Bio of %s: %s", user.first_name, update.message.text) + self.logger.info("Experience of %s: %s", user.first_name, update.message.text) + + await update.message.reply_text( + "Gorgeous!\n" + "Now, send me keywords to filter out positions based on title\n" + "Example: Data,QA,..." + ) + return Flow.FILTERS.value + + async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Stores the location and asks for some info about the user.""" + user = update.message.from_user + self.logger.info( + "Filters of %s: %f / %f", user.first_name, update.message.text + ) + self.filters = update.message.text.split(",") + reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, + input_field_placeholder=Flow.VERIFY_FILTERS.name) + await update.message.reply_text(f"Did you choose: {self.filters} ?", reply_markup=reply_markup) + + return Flow.VERIFY_FILTERS.value + + async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + if update.message.text == "No": + await update.message.reply_text( + "Please send the filters\n" + "Example: QA ,DATA,..." + ) + return Flow.FILTERS.value + + await update.message.reply_text("Thank you! I hope we can talk again some day.") + + return ConversationHandler.END + + async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Skips the location and asks for info about the user.""" + user = update.message.from_user + self.logger.info("User %s did not send a filters.", user.first_name) await update.message.reply_text("Thank you! I hope we can talk again some day.") return ConversationHandler.END @@ -161,14 +192,12 @@ start_handler = TelegramStartHandler() start_conv_handler = ConversationHandler( entry_points=[CommandHandler("start", start_handler.start)], states={ - Flow.POSITION: [MessageHandler(filters.TEXT, start_handler.position)] - # Flow.SAVE_POSITION: [MessageHandler(filters.TEXT, start_handler.position)] - # Flow.ADDRESS: [MessageHandler(filters.PHOTO, photo), CommandHandler("skip", skip_photo)], - # Flow.FILTERS: [ - # MessageHandler(filters.LOCATION, location), - # CommandHandler("skip", skip_location), - # ], - # Flow.EXPERIENCE: [MessageHandler(filters.TEXT & ~filters.COMMAND, bio)], + Flow.POSITION.value: [MessageHandler(filters.TEXT, start_handler.position)], + Flow.ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.address)], + Flow.VERIFY_ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.verify_address)], + Flow.EXPERIENCE.value: [MessageHandler(filters.TEXT, start_handler.experience)], + Flow.FILTERS.value: [MessageHandler(filters.TEXT, start_handler.filters_flow)], + Flow.VERIFY_FILTERS.value: [MessageHandler(filters.TEXT, start_handler.verify_filter)], }, fallbacks=[CommandHandler("cancel", start_handler.cancel)], ) From f1fc22cb06a6dde29af758a048d55a59bf85f6a4 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Thu, 2 Jan 2025 17:35:07 +0200 Subject: [PATCH 03/22] small fix --- src/telegram_handler/telegram_start_handler.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 8af02c3..8ab6b8c 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -117,10 +117,6 @@ class TelegramStartHandler(TelegramHandler): async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Stores the location and asks for some info about the user.""" - user = update.message.from_user - self.logger.info( - "Filters of %s: %f / %f", user.first_name, update.message.text - ) self.filters = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_FILTERS.name) From 90132ca1990b8d2c6f77e9c835e19b65ffb6b78e Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Fri, 3 Jan 2025 18:48:54 +0200 Subject: [PATCH 04/22] fixed env --- src/config/settings.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/config/settings.py b/src/config/settings.py index 8d1b38d..c17dbdd 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -1,25 +1,16 @@ import os -from pydantic_settings import BaseSettings, SettingsConfigDict +from pydantic import Field +from pydantic_settings import BaseSettings -if not os.getenv("ENV"): +if not os.environ.get("ENV"): raise ValueError("Invalid environment. Set the 'ENV' variable (e.g., export ENV=dev).") -dotenv_file = os.path.join(os.path.dirname(__file__), ".env." + os.environ.get("ENV")) - -if not os.path.exists(dotenv_file): - raise FileNotFoundError(f"Environment file not found: {dotenv_file}") - class Settings(BaseSettings): - environment: str - telegram_api_token: str - mongo_uri: str - mongo_db_name: str - print(f"Loading environment from: {dotenv_file}") - model_config = SettingsConfigDict( - env_file=dotenv_file - ) + telegram_api_token: str = Field(alias="telegram_api_token") + mongo_uri: str = Field(alias="mongo_uri") + mongo_db_name: str = Field(alias="mongo_db_name") settings = Settings() From 1f8f49fca2b5d818c71cb286605c3bf83ac06de0 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 12:56:46 +0200 Subject: [PATCH 05/22] updated the messages --- .../start_handler_constats.py | 29 +++++++++++ .../telegram_start_handler.py | 50 +++++++------------ 2 files changed, 47 insertions(+), 32 deletions(-) create mode 100644 src/telegram_handler/start_handler_constats.py diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py new file mode 100644 index 0000000..a3d2496 --- /dev/null +++ b/src/telegram_handler/start_handler_constats.py @@ -0,0 +1,29 @@ +START_MESSAGE: str = "Hi there! I'm Professor Bot, your friendly job search assistant.😊\n" \ + "I'm here to help you find the perfect position." \ + "To stop chatting with me at any time, just send '/cancel'.\n\n" + +POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \ + "(e.g., Software Engineer, Data Scientist, Marketing Manager)" + +POSITION_NOT_FOUND: str = "I couldn't find any positions matching your request. 😕\n" \ + "Please try again" +LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \ + "(e.g., Rishon Lezion, New York City, San Francisco)" \ + "You can enter multiple tags separated by commas. 🔍" +EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n" +FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any relevant tags or keywords.\n" \ + "For example: 'remote', 'entry-level', 'python', 'machine learning', 'QA'.\n" \ + "You can enter multiple tags separated by commas. 🔍" +THANK_YOU_MESSAGE: str = "Thank you for chatting with Professor Bot!\n\n" \ + "I can help you find jobs on LinkedIn, Glassdoor, and more." \ + "To search for jobs on a specific site, simply send the site name:\n" \ + "/linkedin\n" \ + "/glassdoor\n" \ + "/google\n" \ + "Or, use the command /find to search across all supported job boards for a broader search.\n" \ + "Let me know how I can assist you further! 😊" + +BYE_MESSAGE: str = "Have a great day!✨\n" \ + "I hope to assist you with your job search in the future.😊" + +VERIFY_MESSAGE:str = "Did you choose: %s ? 🧐" diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 8ab6b8c..e543a9f 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -10,6 +10,8 @@ from db.position_repository import position_repository from db.user_repository import UserRepository from jobspy.scrapers.utils import create_logger from telegram_bot import TelegramBot +from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ + LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE from telegram_handler.telegram_handler import TelegramHandler @@ -44,9 +46,7 @@ class TelegramStartHandler(TelegramHandler): reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, input_field_placeholder=Flow.POSITION.name) await update.message.reply_text( - "Hi! My name is Professor Bot. I will hold a conversation with you. " - "Send /cancel to stop talking to me.\n\n" - "What Position are you looking for?", + START_MESSAGE + POSITION_MESSAGE, reply_markup=reply_markup, ) @@ -58,48 +58,40 @@ class TelegramStartHandler(TelegramHandler): self.logger.info("Position of %s: %s", user.first_name, update.message.text) position = next((p for p in self.positions if p.name == update.message.text), None) if not position: - await update.message.reply_text("Position not found") + await update.message.reply_text(POSITION_NOT_FOUND) buttons = [[KeyboardButton(position.name)] for position in self.positions] reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, input_field_placeholder=Flow.POSITION.name) await update.message.reply_text( - "What Position are you looking for?", + POSITION_MESSAGE, reply_markup=reply_markup, ) return Flow.POSITION.value - await update.message.reply_text( - "Gorgeous! Now, send me cites you want to search for\n" - "Example: Rishon Lezion,Petah Tikva,..." - ) + await update.message.reply_text(LOCATION_MESSAGE) return Flow.ADDRESS.value - async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + async def address(self, update: Update) -> int: """Stores the photo and asks for a location.""" user = update.message.from_user self.cities = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_ADDRESS.name) - await update.message.reply_text(f"Did you choose: {self.cities} ?", reply_markup=reply_markup) + await update.message.reply_text(VERIFY_MESSAGE % self.filters, reply_markup=reply_markup) return Flow.VERIFY_ADDRESS.value async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: if update.message.text == "No": - await update.message.reply_text( - "Please send the cities\n" - "Example: Rishon Lezion,Petah Tikva,..." - ) + await update.message.reply_text(LOCATION_MESSAGE) return Flow.ADDRESS.value reply_markup = ReplyKeyboardMarkup([["1", "2"]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_ADDRESS.name) - await update.message.reply_text( - "Maybe I can visit you sometime!\n" - "Tell Your experience", - reply_markup=reply_markup - ) + await update.message.reply_text(EXPERIENCE_MESSAGE, + reply_markup=reply_markup + ) return Flow.EXPERIENCE.value @@ -109,10 +101,7 @@ class TelegramStartHandler(TelegramHandler): self.logger.info("Experience of %s: %s", user.first_name, update.message.text) await update.message.reply_text( - "Gorgeous!\n" - "Now, send me keywords to filter out positions based on title\n" - "Example: Data,QA,..." - ) + FILTER_TILE_MESSAGE) return Flow.FILTERS.value async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: @@ -120,19 +109,16 @@ class TelegramStartHandler(TelegramHandler): self.filters = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_FILTERS.name) - await update.message.reply_text(f"Did you choose: {self.filters} ?", reply_markup=reply_markup) + await update.message.reply_text(VERIFY_MESSAGE % self.filters, reply_markup=reply_markup) return Flow.VERIFY_FILTERS.value async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: if update.message.text == "No": - await update.message.reply_text( - "Please send the filters\n" - "Example: QA ,DATA,..." - ) + await update.message.reply_text(FILTER_TILE_MESSAGE) return Flow.FILTERS.value - await update.message.reply_text("Thank you! I hope we can talk again some day.") + await update.message.reply_text(THANK_YOU_MESSAGE) return ConversationHandler.END @@ -140,7 +126,7 @@ class TelegramStartHandler(TelegramHandler): """Skips the location and asks for info about the user.""" user = update.message.from_user self.logger.info("User %s did not send a filters.", user.first_name) - await update.message.reply_text("Thank you! I hope we can talk again some day.") + await update.message.reply_text(THANK_YOU_MESSAGE) return ConversationHandler.END @@ -149,7 +135,7 @@ class TelegramStartHandler(TelegramHandler): user = update.message.from_user self.logger.info("User %s canceled the conversation.", user.first_name) await update.message.reply_text( - "Bye! I hope we can talk again some day.", reply_markup=ReplyKeyboardRemove() + BYE_MESSAGE, reply_markup=ReplyKeyboardRemove() ) return ConversationHandler.END From eb97fa136a25ae83654ffe7928e484e200b198ab Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 15:23:11 +0200 Subject: [PATCH 06/22] removed position repo switched position to static enum add export to user repo --- src/db/Position.py | 33 +++++- src/db/User.py | 7 +- src/db/position_repository.py | 106 ------------------ src/db/user_repository.py | 3 + .../telegram_start_handler.py | 12 +- 5 files changed, 41 insertions(+), 120 deletions(-) delete mode 100644 src/db/position_repository.py diff --git a/src/db/Position.py b/src/db/Position.py index a7a98a7..c78cad1 100644 --- a/src/db/Position.py +++ b/src/db/Position.py @@ -1,5 +1,32 @@ -from pydantic import BaseModel +from enum import Enum -class Position(BaseModel): - name: str +class Position(Enum): + PRODUCT_MANAGEMENT = "Product Management" + DATA_ANALYST = "Data Analyst" + DATA_SCIENCE = "Data Science, ML & Algorithms" + SOFTWARE_ENGINEERING = "Software Engineering" + FULLSTACK_DEVELOPMENT = "Fullstack Development" + QA = "QA" + CYBERSECURITY = "Cybersecurity" + IT_AND_SYSTEM_ADMINISTRATION = "IT and System Administration" + FRONTEND_DEVELOPMENT = "Frontend Development" + DEV_OPS = "DevOps" + UI_UX = "UI/UX, Design & Content" + HR_RECRUITMENT = "HR & Recruitment" + MOBILE_DEVELOPMENT = "Mobile Development" + HARDWARE_ENGINEERING = "Hardware Engineering" + EMBEDDED_ENGINEERING = "Embedded, Low Level & Firmware Engineering", + CUSTOMER_SUCCESS = "Customer Success" + PROJECT_MANAGEMENT = "Project Management" + OPERATIONS = "Operations" + FINANCE = "Finance" + SYSTEMS_ENGINEERING = "Systems Engineering" + MARKETING = "Marketing" + SALES = "Sales" + LEGAL_POLICY = "Compliance, Legal & Policy" + C_LEVEL = "C-Level" + BUSINESS_DEVELOPMENT = "Business Development" + MECHANICAL_ENGINEERING = "Mechanical Engineering" + NATURAL_SCIENCE = "Natural Science" + OTHER = "Other" diff --git a/src/db/User.py b/src/db/User.py index 490bb52..daad0c0 100644 --- a/src/db/User.py +++ b/src/db/User.py @@ -1,7 +1,6 @@ -from datetime import datetime from typing import Optional, Union -from pydantic import BaseModel +from pydantic import BaseModel, Field from db.Position import Position @@ -9,5 +8,5 @@ from db.Position import Position class User(BaseModel): full_name: str username: str - chat_id: Union[int,str] - field: Optional[Position] = None + chat_id: Union[int, str] = None + field: Optional[Position] = Position.SOFTWARE_ENGINEERING diff --git a/src/db/position_repository.py b/src/db/position_repository.py deleted file mode 100644 index af068b8..0000000 --- a/src/db/position_repository.py +++ /dev/null @@ -1,106 +0,0 @@ -from typing import Optional - -from dotenv import load_dotenv -from pymongo import UpdateOne - -from jobspy import create_logger -from .Position import Position -from .monogo_db import mongo_client - -load_dotenv() - - -class PositionRepository: - _instance = None - - def __new__(cls): - - if cls._instance is not None: - return cls._instance - - self = super().__new__(cls) - cls._instance = self - self.logger = create_logger("PositionRepository") - self.collection = mongo_client.db["field"] - return cls._instance - - def find_all(self) -> list[Position]: - positions = list(self.collection.find({})) - return [Position(**position) for position in positions] - - def find_by_id(self, position_id: str) -> Optional[Position]: - """ - Finds a position document in the collection by its ID. - - Args: - position_id: The ID of the position to find. - - Returns: - The position document if found, otherwise None. - """ - result = self.collection.find_one({"id": position_id}) - return Position(**result) - - def update(self, position: Position) -> bool: - """ - Updates a Position in the database. - - Args: - position: A dictionary representing the Position data. - - Returns: - True if the update was successful, False otherwise. - """ - result = self.collection.update_one({"id": position.id}, {"$set": position.model_dump()}) - return result.modified_count > 0 - - def insert_position(self, position: Position): - """ - Inserts a new position posting into the database collection. - - Args: - position (Position): The Position object to be inserted. - - Raises: - Exception: If an error occurs during insertion. - """ - self.collection.insert_one(position.model_dump()) - self.logger.info(f"Inserted new position with name {position.name}.") - - def insert_many_if_not_found(self, positions: list[Position]) -> tuple[list[Position], list[Position]]: - """ - Perform bulk upserts for a list of Position objects into a MongoDB collection. - Only insert new positions and return the list of newly inserted positions. - """ - operations = [] - new_positions = [] # List to store the new positions inserted into MongoDB - old_positions = [] # List to store the new positions inserted into MongoDB - for position in positions: - position_dict = position.model_dump() - operations.append( - UpdateOne( - {"id": position.id}, # Match by `id` - # Only set positions if the position is being inserted (not updated) - {"$setOnInsert": position_dict}, - upsert=True # Insert if not found, but do not update if already exists - ) - ) - - if operations: - # Execute all operations in bulk - result = self.collection.bulk_write(operations) - self.logger.info(f"Matched: {result.matched_count}, Upserts: { - result.upserted_count}, Modified: {result.modified_count}") - - # Get the newly inserted positions (those that were upserted) - # The `upserted_count` corresponds to how many new documents were inserted - for i, position in enumerate(positions): - if result.upserted_count > 0 and i < result.upserted_count: - new_positions.append(position) - else: - old_positions.append(position) - - return old_positions, new_positions - - -position_repository = PositionRepository() diff --git a/src/db/user_repository.py b/src/db/user_repository.py index 70c592a..6ce4a9b 100644 --- a/src/db/user_repository.py +++ b/src/db/user_repository.py @@ -22,6 +22,7 @@ class UserRepository: cls._instance = self self.logger = create_logger("UserRepository") self.collection = mongo_client.db["user"] + self.collection.create_index('username', unique=True) return cls._instance def find_by_id(self, user_id: str) -> Optional[User]: @@ -97,3 +98,5 @@ class UserRepository: old_users.append(user) return old_users, new_users + +user_repository = UserRepository() \ No newline at end of file diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index e543a9f..6e24e9f 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -5,9 +5,9 @@ from telegram.ext import ( ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters, ) +from db.Position import Position from db.User import User -from db.position_repository import position_repository -from db.user_repository import UserRepository +from db.user_repository import UserRepository, user_repository from jobspy.scrapers.utils import create_logger from telegram_bot import TelegramBot from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ @@ -30,9 +30,7 @@ class TelegramStartHandler(TelegramHandler): def __init__(self): self.filters = None self.telegram_bot = TelegramBot() - self.user_repository = UserRepository() self.logger = create_logger("TelegramStartHandler") - self.positions = position_repository.find_all() self.temp_user = None self.cities = None @@ -40,9 +38,9 @@ class TelegramStartHandler(TelegramHandler): """Starts the conversation and asks the user about their position.""" chat: Chat = update.message.chat user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) - self.user_repository.insert_user(user) + user_repository.insert_user(user) - buttons = [[KeyboardButton(position.name)] for position in self.positions] + buttons = [[KeyboardButton(position.value)] for position in Position] reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, input_field_placeholder=Flow.POSITION.name) await update.message.reply_text( @@ -147,7 +145,7 @@ class TelegramStartHandler(TelegramHandler): # chat.username - 'Qw1zeR' # chat.full_name - 'Qw1zeR' # user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) - # self.user_repository.insert_user(user) + # user_repository.insert_user(user) # fields = field_repository.find_all() # Get all fields from the database # buttons = [[KeyboardButton(field.name)] for field in fields] # reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True) From c1dabdb88a6e040984810f4c8052affb60f369bf Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 16:15:50 +0200 Subject: [PATCH 07/22] changed db folder name added codac to position enum --- src/{db => model}/User.py | 2 +- src/{db => model}/__init__.py | 0 src/model/codec/__init__.py | 0 src/model/codec/position_codec.py | 17 +++++++++++++ src/{db => model}/job_repository.py | 2 +- src/{db => model}/user_repository.py | 9 +++++-- .../button_callback_context.py | 2 +- .../button_callback/button_fire_strategy.py | 2 +- .../telegram_default_handler.py | 2 +- .../telegram_start_handler.py | 25 +++++++++++-------- tests/test_db.py | 2 +- 11 files changed, 44 insertions(+), 19 deletions(-) rename src/{db => model}/User.py (86%) rename src/{db => model}/__init__.py (100%) create mode 100644 src/model/codec/__init__.py create mode 100644 src/model/codec/position_codec.py rename src/{db => model}/job_repository.py (98%) rename src/{db => model}/user_repository.py (90%) diff --git a/src/db/User.py b/src/model/User.py similarity index 86% rename from src/db/User.py rename to src/model/User.py index daad0c0..a437dbe 100644 --- a/src/db/User.py +++ b/src/model/User.py @@ -2,7 +2,7 @@ from typing import Optional, Union from pydantic import BaseModel, Field -from db.Position import Position +from model.Position import Position class User(BaseModel): diff --git a/src/db/__init__.py b/src/model/__init__.py similarity index 100% rename from src/db/__init__.py rename to src/model/__init__.py diff --git a/src/model/codec/__init__.py b/src/model/codec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/model/codec/position_codec.py b/src/model/codec/position_codec.py new file mode 100644 index 0000000..e23360c --- /dev/null +++ b/src/model/codec/position_codec.py @@ -0,0 +1,17 @@ +from bson.codec_options import TypeCodec + +from model.Position import Position + + +class PositionCodec(TypeCodec): + python_type = Position + bson_type = str + + def transform_python(self, value): + return value.value + + def transform_bson(self, value): + return Position(value) + + +position_codec = PositionCodec() diff --git a/src/db/job_repository.py b/src/model/job_repository.py similarity index 98% rename from src/db/job_repository.py rename to src/model/job_repository.py index 53607fd..85ff0ca 100644 --- a/src/db/job_repository.py +++ b/src/model/job_repository.py @@ -21,7 +21,7 @@ class JobRepository: self = super().__new__(cls) cls._instance = self self.logger = create_logger("JobRepository") - self.collection = mongo_client.db["jobs"] + self.collection = mongo_client._db["jobs"] return cls._instance def find_by_id(self, job_id: str) -> Optional[JobPost]: diff --git a/src/db/user_repository.py b/src/model/user_repository.py similarity index 90% rename from src/db/user_repository.py rename to src/model/user_repository.py index 6ce4a9b..6a5d060 100644 --- a/src/db/user_repository.py +++ b/src/model/user_repository.py @@ -1,10 +1,12 @@ from typing import Optional +from bson.codec_options import TypeRegistry, CodecOptions from dotenv import load_dotenv from pymongo import UpdateOne from jobspy import create_logger from .User import User +from .codec.position_codec import position_codec from .monogo_db import mongo_client load_dotenv() @@ -21,7 +23,9 @@ class UserRepository: self = super().__new__(cls) cls._instance = self self.logger = create_logger("UserRepository") - self.collection = mongo_client.db["user"] + type_registry = TypeRegistry([position_codec]) + codec_options = CodecOptions(type_registry=type_registry) + self.collection = mongo_client.get_collection('user', codec_options=codec_options) self.collection.create_index('username', unique=True) return cls._instance @@ -99,4 +103,5 @@ class UserRepository: return old_users, new_users -user_repository = UserRepository() \ No newline at end of file + +user_repository = UserRepository() diff --git a/src/telegram_handler/button_callback/button_callback_context.py b/src/telegram_handler/button_callback/button_callback_context.py index 111c88d..0a8f09a 100644 --- a/src/telegram_handler/button_callback/button_callback_context.py +++ b/src/telegram_handler/button_callback/button_callback_context.py @@ -3,7 +3,7 @@ from __future__ import annotations from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from db.job_repository import JobRepository +from model.job_repository import JobRepository from jobspy import create_logger from telegram_handler.button_callback.button_fire_strategy import FireStrategy from telegram_handler.button_callback.button_job_title_strategy import JobTitleStrategy diff --git a/src/telegram_handler/button_callback/button_fire_strategy.py b/src/telegram_handler/button_callback/button_fire_strategy.py index 70a4f76..7930549 100644 --- a/src/telegram_handler/button_callback/button_fire_strategy.py +++ b/src/telegram_handler/button_callback/button_fire_strategy.py @@ -1,7 +1,7 @@ from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from db.job_repository import JobRepository +from model.job_repository import JobRepository from jobspy import create_logger from telegram_bot import TelegramBot from telegram_handler.button_callback.button_strategy import ButtonStrategy diff --git a/src/telegram_handler/telegram_default_handler.py b/src/telegram_handler/telegram_default_handler.py index c0d5208..5b1487e 100644 --- a/src/telegram_handler/telegram_default_handler.py +++ b/src/telegram_handler/telegram_default_handler.py @@ -4,7 +4,7 @@ from telegram.ext import ( ContextTypes, ) -from db.job_repository import JobRepository +from model.job_repository import JobRepository from jobspy import Site, scrape_jobs, JobPost from jobspy.scrapers.utils import create_logger from telegram_bot import TelegramBot diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 6e24e9f..8da8bd3 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -5,9 +5,9 @@ from telegram.ext import ( ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters, ) -from db.Position import Position -from db.User import User -from db.user_repository import UserRepository, user_repository +from model.Position import Position +from model.User import User +from model.user_repository import UserRepository, user_repository from jobspy.scrapers.utils import create_logger from telegram_bot import TelegramBot from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ @@ -38,26 +38,27 @@ class TelegramStartHandler(TelegramHandler): """Starts the conversation and asks the user about their position.""" chat: Chat = update.message.chat user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) - user_repository.insert_user(user) + # user_repository.insert_user(user) + await update.message.reply_text(START_MESSAGE) buttons = [[KeyboardButton(position.value)] for position in Position] reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, input_field_placeholder=Flow.POSITION.name) await update.message.reply_text( - START_MESSAGE + POSITION_MESSAGE, + POSITION_MESSAGE, reply_markup=reply_markup, ) return Flow.POSITION.value async def position(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: - """Stores the selected position and asks for a photo.""" + """Stores the selected position and asks for a locations.""" user = update.message.from_user self.logger.info("Position of %s: %s", user.first_name, update.message.text) - position = next((p for p in self.positions if p.name == update.message.text), None) + position = next((p for p in Position if p.name == update.message.text), None) if not position: await update.message.reply_text(POSITION_NOT_FOUND) - buttons = [[KeyboardButton(position.name)] for position in self.positions] + buttons = [[KeyboardButton(position.name)] for position in Position] reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, input_field_placeholder=Flow.POSITION.name) await update.message.reply_text( @@ -71,7 +72,7 @@ class TelegramStartHandler(TelegramHandler): return Flow.ADDRESS.value async def address(self, update: Update) -> int: - """Stores the photo and asks for a location.""" + """Asks for a location.""" user = update.message.from_user self.cities = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, @@ -81,6 +82,7 @@ class TelegramStartHandler(TelegramHandler): return Flow.VERIFY_ADDRESS.value async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Verify for a Address.""" if update.message.text == "No": await update.message.reply_text(LOCATION_MESSAGE) return Flow.ADDRESS.value @@ -94,7 +96,7 @@ class TelegramStartHandler(TelegramHandler): return Flow.EXPERIENCE.value async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: - """Stores the info about the user and ends the conversation.""" + """Asks for a experience.""" user = update.message.from_user self.logger.info("Experience of %s: %s", user.first_name, update.message.text) @@ -103,7 +105,7 @@ class TelegramStartHandler(TelegramHandler): return Flow.FILTERS.value async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: - """Stores the location and asks for some info about the user.""" + """Asks for a filters_flow.""" self.filters = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_FILTERS.name) @@ -112,6 +114,7 @@ class TelegramStartHandler(TelegramHandler): return Flow.VERIFY_FILTERS.value async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Verify for a filters_flow.""" if update.message.text == "No": await update.message.reply_text(FILTER_TILE_MESSAGE) return Flow.FILTERS.value diff --git a/tests/test_db.py b/tests/test_db.py index bbe0e02..e136074 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -1,6 +1,6 @@ from dotenv import load_dotenv -from db.job_repository import JobRepository +from model.job_repository import JobRepository from tests.test_util import createMockJob load_dotenv() From 95c1672f118c621e094d1aa8c7e4cffbdb60ad7e Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 16:16:01 +0200 Subject: [PATCH 08/22] changed db folder name added codac to position enum --- src/{db => model}/Position.py | 2 +- src/{db => model}/monogo_db.py | 34 ++++++++++--------- .../start_handler_constats.py | 2 +- 3 files changed, 20 insertions(+), 18 deletions(-) rename src/{db => model}/Position.py (99%) rename src/{db => model}/monogo_db.py (50%) diff --git a/src/db/Position.py b/src/model/Position.py similarity index 99% rename from src/db/Position.py rename to src/model/Position.py index c78cad1..4b22f78 100644 --- a/src/db/Position.py +++ b/src/model/Position.py @@ -16,7 +16,7 @@ class Position(Enum): HR_RECRUITMENT = "HR & Recruitment" MOBILE_DEVELOPMENT = "Mobile Development" HARDWARE_ENGINEERING = "Hardware Engineering" - EMBEDDED_ENGINEERING = "Embedded, Low Level & Firmware Engineering", + EMBEDDED_ENGINEERING = "Embedded, Low Level & Firmware Engineering" CUSTOMER_SUCCESS = "Customer Success" PROJECT_MANAGEMENT = "Project Management" OPERATIONS = "Operations" diff --git a/src/db/monogo_db.py b/src/model/monogo_db.py similarity index 50% rename from src/db/monogo_db.py rename to src/model/monogo_db.py index eea7d84..f15c4c8 100644 --- a/src/db/monogo_db.py +++ b/src/model/monogo_db.py @@ -1,5 +1,3 @@ -import os - from pymongo import MongoClient from pymongo.synchronous.database import Database @@ -8,29 +6,33 @@ from jobspy import create_logger class MongoDB: - _instance = None - db:Database = None - def __new__(cls): - - if cls._instance is not None: - return cls._instance - - self = super().__new__(cls) - cls._instance = self + def __init__(self): logger = create_logger("Mongo Client") - mongoUri = settings.mongo_uri - if not mongoUri: + mongo_uri = settings.mongo_uri + if not mongo_uri: logger.error("MONGO_URI environment variable is not set") raise ValueError("MONGO_URI environment variable is not set") - client = MongoClient(mongoUri) + client = MongoClient(mongo_uri) database_name = settings.mongo_db_name if not database_name: logger.error("MONGO_DB_NAME environment variable is not set") raise ValueError( "MONGO_DB_NAME environment variable is not set") - self.db = client[database_name] + self._db: Database = client[database_name] logger.info("Succeed connect to MongoDB") - return cls._instance + + def get_collection(self, + name: str, + codec_options=None, + read_preference=None, + write_concern=None, + read_concern=None): + return self._db.get_collection(name, + codec_options, + read_preference, + write_concern, + read_concern) + mongo_client = MongoDB() diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py index a3d2496..5e095d4 100644 --- a/src/telegram_handler/start_handler_constats.py +++ b/src/telegram_handler/start_handler_constats.py @@ -1,5 +1,5 @@ START_MESSAGE: str = "Hi there! I'm Professor Bot, your friendly job search assistant.😊\n" \ - "I'm here to help you find the perfect position." \ + "I'm here to help you find the perfect position.\n\n" \ "To stop chatting with me at any time, just send '/cancel'.\n\n" POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \ From 7aa8632aa164183adff5a0fb7be09ba708e97008 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 16:17:21 +0200 Subject: [PATCH 09/22] text fixes --- src/telegram_handler/start_handler_constats.py | 2 +- src/telegram_handler/telegram_start_handler.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py index 5e095d4..ff3f607 100644 --- a/src/telegram_handler/start_handler_constats.py +++ b/src/telegram_handler/start_handler_constats.py @@ -8,7 +8,7 @@ POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \ POSITION_NOT_FOUND: str = "I couldn't find any positions matching your request. 😕\n" \ "Please try again" LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \ - "(e.g., Rishon Lezion, New York City, San Francisco)" \ + "(e.g., Rishon Lezion, New York City, San Francisco)\n" \ "You can enter multiple tags separated by commas. 🔍" EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n" FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any relevant tags or keywords.\n" \ diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 8da8bd3..c2daced 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -5,10 +5,9 @@ from telegram.ext import ( ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters, ) +from jobspy.scrapers.utils import create_logger from model.Position import Position from model.User import User -from model.user_repository import UserRepository, user_repository -from jobspy.scrapers.utils import create_logger from telegram_bot import TelegramBot from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE From a2f02d7efd17739931a06b2a1c4ea5cc8ae75509 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 17:12:25 +0200 Subject: [PATCH 10/22] fixes --- src/model/Position.py | 2 +- src/model/User.py | 2 +- src/model/codec/position_codec.py | 4 +- src/model/user_repository.py | 70 ++++++++++++------- .../start_handler_constats.py | 28 ++++---- .../telegram_start_handler.py | 21 ++++-- 6 files changed, 80 insertions(+), 47 deletions(-) diff --git a/src/model/Position.py b/src/model/Position.py index 4b22f78..0464607 100644 --- a/src/model/Position.py +++ b/src/model/Position.py @@ -1,7 +1,7 @@ from enum import Enum -class Position(Enum): +class Position(str, Enum): PRODUCT_MANAGEMENT = "Product Management" DATA_ANALYST = "Data Analyst" DATA_SCIENCE = "Data Science, ML & Algorithms" diff --git a/src/model/User.py b/src/model/User.py index a437dbe..72ebfad 100644 --- a/src/model/User.py +++ b/src/model/User.py @@ -9,4 +9,4 @@ class User(BaseModel): full_name: str username: str chat_id: Union[int, str] = None - field: Optional[Position] = Position.SOFTWARE_ENGINEERING + field: Optional[Position] = None diff --git a/src/model/codec/position_codec.py b/src/model/codec/position_codec.py index e23360c..95bbddc 100644 --- a/src/model/codec/position_codec.py +++ b/src/model/codec/position_codec.py @@ -8,10 +8,10 @@ class PositionCodec(TypeCodec): bson_type = str def transform_python(self, value): - return value.value + return value.name def transform_bson(self, value): return Position(value) -position_codec = PositionCodec() +# position_codec = PositionCodec() diff --git a/src/model/user_repository.py b/src/model/user_repository.py index 6a5d060..6003825 100644 --- a/src/model/user_repository.py +++ b/src/model/user_repository.py @@ -1,33 +1,22 @@ from typing import Optional -from bson.codec_options import TypeRegistry, CodecOptions +from cachebox import LRUCache from dotenv import load_dotenv from pymongo import UpdateOne from jobspy import create_logger from .User import User -from .codec.position_codec import position_codec from .monogo_db import mongo_client load_dotenv() class UserRepository: - _instance = None - - def __new__(cls): - - if cls._instance is not None: - return cls._instance - - self = super().__new__(cls) - cls._instance = self - self.logger = create_logger("UserRepository") - type_registry = TypeRegistry([position_codec]) - codec_options = CodecOptions(type_registry=type_registry) - self.collection = mongo_client.get_collection('user', codec_options=codec_options) - self.collection.create_index('username', unique=True) - return cls._instance + def __init__(self): + self._cache = LRUCache(50) + self._logger = create_logger("UserRepository") + self._collection = mongo_client.get_collection('user') + self._collection.create_index('username', unique=True) def find_by_id(self, user_id: str) -> Optional[User]: """ @@ -39,8 +28,41 @@ class UserRepository: Returns: The user document if found, otherwise None. """ - result = self.collection.find_one({"id": user_id}) - return User(**result) + user = None + cached_user = self._cache[user_id] + if cached_user: + return User(**cached_user) + + result = self._collection.find_one({"id": user_id}) + + if result: + user = User(**result) + self._cache[user_id] = user + + return user + + def find_by_username(self, username: str) -> Optional[User]: + """ + Finds a user document in the collection by its username. + + Args: + username: The username of the user to find. + + Returns: + The user document if found, otherwise None. + """ + user = None + cached_user = self._cache.get(username) + if cached_user: + return cached_user + + self._logger.info("Find user by username") + result = self._collection.find_one({"username": username}) + if result: + user = User(**result) + self._cache[username] = user + + return user def update(self, user: User) -> bool: """ @@ -52,7 +74,7 @@ class UserRepository: Returns: True if the update was successful, False otherwise. """ - result = self.collection.update_one({"id": user.id}, {"$set": user.model_dump()}) + result = self._collection.update_one({"id": user.id}, {"$set": user.model_dump()}) return result.modified_count > 0 def insert_user(self, user: User): @@ -65,8 +87,8 @@ class UserRepository: Raises: Exception: If an error occurs during insertion. """ - self.collection.insert_one(user.model_dump()) - self.logger.info(f"Inserted new user with username {user.username}.") + self._collection.insert_one(user.model_dump()) + self._logger.info(f"Inserted new user with username {user.username}.") def insert_many_if_not_found(self, users: list[User]) -> tuple[list[User], list[User]]: """ @@ -89,8 +111,8 @@ class UserRepository: if operations: # Execute all operations in bulk - result = self.collection.bulk_write(operations) - self.logger.info(f"Matched: {result.matched_count}, Upserts: { + result = self._collection.bulk_write(operations) + self._logger.info(f"Matched: {result.matched_count}, Upserts: { result.upserted_count}, Modified: {result.modified_count}") # Get the newly inserted users (those that were upserted) diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py index ff3f607..ee70ea1 100644 --- a/src/telegram_handler/start_handler_constats.py +++ b/src/telegram_handler/start_handler_constats.py @@ -7,23 +7,27 @@ POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \ POSITION_NOT_FOUND: str = "I couldn't find any positions matching your request. 😕\n" \ "Please try again" +multi_value_message: str = "📌 You can enter multiple tags separated by commas." + LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \ - "(e.g., Rishon Lezion, New York City, San Francisco)\n" \ - "You can enter multiple tags separated by commas. 🔍" + "(e.g., Rishon Lezion, New York City, San Francisco)\n\n" + multi_value_message + EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n" + FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any relevant tags or keywords.\n" \ - "For example: 'remote', 'entry-level', 'python', 'machine learning', 'QA'.\n" \ - "You can enter multiple tags separated by commas. 🔍" + "For example: 'remote', 'entry-level', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message + THANK_YOU_MESSAGE: str = "Thank you for chatting with Professor Bot!\n\n" \ - "I can help you find jobs on LinkedIn, Glassdoor, and more." \ - "To search for jobs on a specific site, simply send the site name:\n" \ - "/linkedin\n" \ - "/glassdoor\n" \ - "/google\n" \ - "Or, use the command /find to search across all supported job boards for a broader search.\n" \ - "Let me know how I can assist you further! 😊" + "I can help you find jobs on LinkedIn, Glassdoor, and more." + +SEARCH_MESSAGE: str = "To search for jobs on a specific site, simply send the site name:\n" \ + "/linkedin\n" \ + "/glassdoor\n" \ + "/google\n\n" \ + "Or, use the command /find to search across all supported job boards for a broader search.\n\n" \ + "Let me know how I can assist you further! 😊" BYE_MESSAGE: str = "Have a great day!✨\n" \ "I hope to assist you with your job search in the future.😊" -VERIFY_MESSAGE:str = "Did you choose: %s ? 🧐" +VERIFY_MESSAGE: str = "Did you choose: %s ? 🧐" diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index c2daced..37f76ce 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -8,9 +8,11 @@ from telegram.ext import ( from jobspy.scrapers.utils import create_logger from model.Position import Position from model.User import User +from model.user_repository import user_repository from telegram_bot import TelegramBot from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ - LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE + LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE, \ + SEARCH_MESSAGE from telegram_handler.telegram_handler import TelegramHandler @@ -36,8 +38,11 @@ class TelegramStartHandler(TelegramHandler): async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Starts the conversation and asks the user about their position.""" chat: Chat = update.message.chat - user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) - # user_repository.insert_user(user) + user = user_repository.find_by_username(chat.username) + if not user: + user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) + user_repository.insert_user(user) + await update.message.reply_text(START_MESSAGE) buttons = [[KeyboardButton(position.value)] for position in Position] @@ -54,10 +59,10 @@ class TelegramStartHandler(TelegramHandler): """Stores the selected position and asks for a locations.""" user = update.message.from_user self.logger.info("Position of %s: %s", user.first_name, update.message.text) - position = next((p for p in Position if p.name == update.message.text), None) + position = next((p for p in Position if p.value == update.message.text), None) if not position: await update.message.reply_text(POSITION_NOT_FOUND) - buttons = [[KeyboardButton(position.name)] for position in Position] + buttons = [[KeyboardButton(position.value)] for position in Position] reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, input_field_placeholder=Flow.POSITION.name) await update.message.reply_text( @@ -70,13 +75,13 @@ class TelegramStartHandler(TelegramHandler): return Flow.ADDRESS.value - async def address(self, update: Update) -> int: + async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a location.""" user = update.message.from_user self.cities = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_ADDRESS.name) - await update.message.reply_text(VERIFY_MESSAGE % self.filters, reply_markup=reply_markup) + await update.message.reply_text(VERIFY_MESSAGE % self.cities, reply_markup=reply_markup) return Flow.VERIFY_ADDRESS.value @@ -119,6 +124,7 @@ class TelegramStartHandler(TelegramHandler): return Flow.FILTERS.value await update.message.reply_text(THANK_YOU_MESSAGE) + await update.message.reply_text(SEARCH_MESSAGE) return ConversationHandler.END @@ -127,6 +133,7 @@ class TelegramStartHandler(TelegramHandler): user = update.message.from_user self.logger.info("User %s did not send a filters.", user.first_name) await update.message.reply_text(THANK_YOU_MESSAGE) + await update.message.reply_text(SEARCH_MESSAGE) return ConversationHandler.END From 4873082147ab36d1fe8a803883c8dbde4b1ce71c Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 17:41:48 +0200 Subject: [PATCH 11/22] all data saved to db at the last step, cache works ! --- requirements.txt | Bin 1142 -> 1176 bytes src/config/cache_manager.py | 17 +++++ src/model/User.py | 3 + src/model/user_repository.py | 17 ++--- .../telegram_start_handler.py | 67 +++++++----------- 5 files changed, 53 insertions(+), 51 deletions(-) create mode 100644 src/config/cache_manager.py diff --git a/requirements.txt b/requirements.txt index 5b69741ce820ca9fa1d7da4ab42d1907119a36e5..723ff53c3c492b5d08d647a72972ad63d25e8bc1 100644 GIT binary patch delta 42 scmeyyF@tkM8H*Ax0~bRwLn07oFr+dhG2}B;Fw`;FGMF&vfw3V20MC5~@Bjb+ delta 7 OcmbQi`Hf>k84CamQv%8W diff --git a/src/config/cache_manager.py b/src/config/cache_manager.py new file mode 100644 index 0000000..e65c67f --- /dev/null +++ b/src/config/cache_manager.py @@ -0,0 +1,17 @@ +from cachebox import LRUCache + + +class CacheboxCacheManager: + def __init__(self): + self._cache = LRUCache(50) + + def find(self, cache_id: str): + """Finding cached data by id, else None""" + return self._cache.get(cache_id) + + def save(self, cache_id: str, data): + """Finding cached data by id, else None""" + self._cache.insert(cache_id, data) + + +cache_manager = CacheboxCacheManager() diff --git a/src/model/User.py b/src/model/User.py index 72ebfad..cf5cee7 100644 --- a/src/model/User.py +++ b/src/model/User.py @@ -9,4 +9,7 @@ class User(BaseModel): full_name: str username: str chat_id: Union[int, str] = None + experience: Union[int, str] = None field: Optional[Position] = None + cities: Optional[list[str]] = None + title_filters: Optional[list[str]] = None diff --git a/src/model/user_repository.py b/src/model/user_repository.py index 6003825..f7ca798 100644 --- a/src/model/user_repository.py +++ b/src/model/user_repository.py @@ -4,6 +4,7 @@ from cachebox import LRUCache from dotenv import load_dotenv from pymongo import UpdateOne +from config.cache_manager import cache_manager from jobspy import create_logger from .User import User from .monogo_db import mongo_client @@ -13,7 +14,6 @@ load_dotenv() class UserRepository: def __init__(self): - self._cache = LRUCache(50) self._logger = create_logger("UserRepository") self._collection = mongo_client.get_collection('user') self._collection.create_index('username', unique=True) @@ -29,15 +29,15 @@ class UserRepository: The user document if found, otherwise None. """ user = None - cached_user = self._cache[user_id] + cached_user = cache_manager.find(user_id) if cached_user: - return User(**cached_user) + return cached_user result = self._collection.find_one({"id": user_id}) if result: user = User(**result) - self._cache[user_id] = user + cache_manager.save(user_id, user) return user @@ -52,15 +52,15 @@ class UserRepository: The user document if found, otherwise None. """ user = None - cached_user = self._cache.get(username) + cached_user = cache_manager.find(username) if cached_user: return cached_user - self._logger.info("Find user by username") result = self._collection.find_one({"username": username}) + self._logger.info("find user by usernameeeeeeee") if result: user = User(**result) - self._cache[username] = user + cache_manager.save(username, user) return user @@ -74,7 +74,7 @@ class UserRepository: Returns: True if the update was successful, False otherwise. """ - result = self._collection.update_one({"id": user.id}, {"$set": user.model_dump()}) + result = self._collection.update_one({"username": user.username}, {"$set": user.model_dump()}) return result.modified_count > 0 def insert_user(self, user: User): @@ -88,6 +88,7 @@ class UserRepository: Exception: If an error occurs during insertion. """ self._collection.insert_one(user.model_dump()) + cache_manager.save(user.username, user) self._logger.info(f"Inserted new user with username {user.username}.") def insert_many_if_not_found(self, users: list[User]) -> tuple[list[User], list[User]]: diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 37f76ce..7d60fc9 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -5,6 +5,7 @@ from telegram.ext import ( ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters, ) +from config.cache_manager import cache_manager from jobspy.scrapers.utils import create_logger from model.Position import Position from model.User import User @@ -13,7 +14,6 @@ from telegram_bot import TelegramBot from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE, \ SEARCH_MESSAGE -from telegram_handler.telegram_handler import TelegramHandler class Flow(Enum): @@ -26,14 +26,11 @@ class Flow(Enum): SKIP_FILTERS = 6 -class TelegramStartHandler(TelegramHandler): +class TelegramStartHandler: def __init__(self): - self.filters = None self.telegram_bot = TelegramBot() self.logger = create_logger("TelegramStartHandler") - self.temp_user = None - self.cities = None async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Starts the conversation and asks the user about their position.""" @@ -70,18 +67,23 @@ class TelegramStartHandler(TelegramHandler): reply_markup=reply_markup, ) return Flow.POSITION.value - + cached_user: User = cache_manager.find(user.username) + cached_user.field = position + cache_manager.save(cached_user.username, cached_user) await update.message.reply_text(LOCATION_MESSAGE) return Flow.ADDRESS.value async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a location.""" - user = update.message.from_user - self.cities = update.message.text.split(",") + cities = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_ADDRESS.name) - await update.message.reply_text(VERIFY_MESSAGE % self.cities, reply_markup=reply_markup) + await update.message.reply_text(VERIFY_MESSAGE % cities, reply_markup=reply_markup) + + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.cities = cities + cache_manager.save(cached_user.username, cached_user) return Flow.VERIFY_ADDRESS.value @@ -103,17 +105,23 @@ class TelegramStartHandler(TelegramHandler): """Asks for a experience.""" user = update.message.from_user self.logger.info("Experience of %s: %s", user.first_name, update.message.text) - + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.experience = update.message.text + cache_manager.save(cached_user.username, cached_user) await update.message.reply_text( FILTER_TILE_MESSAGE) return Flow.FILTERS.value async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a filters_flow.""" - self.filters = update.message.text.split(",") + title_filters = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_FILTERS.name) - await update.message.reply_text(VERIFY_MESSAGE % self.filters, reply_markup=reply_markup) + await update.message.reply_text(VERIFY_MESSAGE % title_filters, reply_markup=reply_markup) + + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.title_filters = title_filters + cache_manager.save(cached_user.username, cached_user) return Flow.VERIFY_FILTERS.value @@ -125,7 +133,8 @@ class TelegramStartHandler(TelegramHandler): await update.message.reply_text(THANK_YOU_MESSAGE) await update.message.reply_text(SEARCH_MESSAGE) - + cached_user: User = cache_manager.find(update.message.from_user.username) + user_repository.update(cached_user) return ConversationHandler.END async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: @@ -144,38 +153,10 @@ class TelegramStartHandler(TelegramHandler): await update.message.reply_text( BYE_MESSAGE, reply_markup=ReplyKeyboardRemove() ) - + cached_user: User = cache_manager.find(user.username) + user_repository.update(cached_user.username, cached_user) return ConversationHandler.END - async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): - self.logger.info("start handling") - # chat: Chat = update.message.chat - # chat.id - 368620919 - # chat.username - 'Qw1zeR' - # chat.full_name - 'Qw1zeR' - # user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) - # user_repository.insert_user(user) - # fields = field_repository.find_all() # Get all fields from the database - # buttons = [[KeyboardButton(field.name)] for field in fields] - # reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True) - # - # await update.message.reply_text("Please select your field:", reply_markup=reply_markup) - # await self.telegram_bot.set_message_reaction( - # update.message.message_id, ReactionEmoji.FIRE) - # site_names = [site.name for site in self.sites_to_scrap] - # site_names_print = ", ".join(site_names) - # await self.telegram_bot.send_text( - # f"Start scarping: {site_names_print}") - # self.logger.info(f"Found {len(jobs)} jobs") - # self.jobRepository.insert_many_if_not_found(filtered_out_jobs) - # old_jobs, new_jobs = self.jobRepository.insert_many_if_not_found(jobs) - # for newJob in new_jobs: - # await self.telegram_bot.send_job(newJob) - # self.logger.info(f"Found {len(old_jobs)} old jobs") - # await self.telegram_bot.send_text( - # f"Finished scarping: {site_names_print}") - self.logger.info("finished handling") - start_handler = TelegramStartHandler() start_conv_handler = ConversationHandler( From 9ffbdd5e2afc2f0fcdbcfc854cc290e6ff7bb47f Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 19:08:10 +0200 Subject: [PATCH 12/22] new handler to see my info --- src/jobspy/__init__.py | 4 ++ src/main.py | 31 ++++++++++-- src/model/Position.py | 50 ++++++++----------- src/model/User.py | 18 ++++++- src/model/job_repository.py | 29 ++++------- .../button_callback_context.py | 7 ++- .../button_callback/button_fire_strategy.py | 7 ++- .../telegram_default_handler.py | 17 ++++--- .../telegram_myinfo_handler.py | 29 +++++++++++ .../telegram_start_handler.py | 2 +- 10 files changed, 127 insertions(+), 67 deletions(-) create mode 100644 src/telegram_handler/telegram_myinfo_handler.py diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index 60980db..f176955 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -166,6 +166,10 @@ def scrape_jobs( """ filtered_jobs = [] remaining_jobs = [] + + if not filter_by_title: + return filtered_jobs, remaining_jobs + for job in jobs: for filter_title in filter_by_title: if re.search(filter_title, job.title, re.IGNORECASE): diff --git a/src/main.py b/src/main.py index c52ef24..4f5c782 100644 --- a/src/main.py +++ b/src/main.py @@ -1,9 +1,13 @@ from telegram import Update, ReplyKeyboardMarkup, ReplyKeyboardRemove from telegram.ext import Application, CommandHandler, ConversationHandler, \ - MessageHandler, filters, ContextTypes + MessageHandler, filters, ContextTypes, CallbackQueryHandler from config.settings import settings +from jobspy import Site from jobspy.scrapers.utils import create_logger +from telegram_handler import TelegramDefaultHandler +from telegram_handler.button_callback.telegram_callback_handler import TelegramCallHandler +from telegram_handler.telegram_myinfo_handler import my_info_handler from telegram_handler.telegram_start_handler import start_conv_handler logger = create_logger("Main") @@ -20,7 +24,28 @@ if __name__ == "__main__": locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"] application.add_handler(start_conv_handler) - - # application.add_handler(CommandHandler('start', start_handler.handle)) + tg_callback_handler = TelegramCallHandler() + tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]) + application.add_handler(CommandHandler("find", tg_handler_all.handle)) + # Goozali + tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI]) + application.add_handler(CommandHandler( + Site.GOOZALI.value, tg_handler_goozali.handle)) + # GlassDoor + tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR]) + application.add_handler(CommandHandler( + Site.GLASSDOOR.value, tg_handler_glassdoor.handle)) + # LinkeDin + tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN]) + application.add_handler(CommandHandler( + Site.LINKEDIN.value, tg_handler_linkedin.handle)) + # Indeed + tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED]) + application.add_handler(CommandHandler( + Site.INDEED.value, tg_handler_indeed.handle)) + application.add_handler(CommandHandler( + "myInfo", my_info_handler.handle)) + application.add_handler(CallbackQueryHandler( + tg_callback_handler.button_callback)) logger.info("Run polling from telegram") application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/model/Position.py b/src/model/Position.py index 0464607..9bdf360 100644 --- a/src/model/Position.py +++ b/src/model/Position.py @@ -2,31 +2,25 @@ from enum import Enum class Position(str, Enum): - PRODUCT_MANAGEMENT = "Product Management" - DATA_ANALYST = "Data Analyst" - DATA_SCIENCE = "Data Science, ML & Algorithms" - SOFTWARE_ENGINEERING = "Software Engineering" - FULLSTACK_DEVELOPMENT = "Fullstack Development" - QA = "QA" - CYBERSECURITY = "Cybersecurity" - IT_AND_SYSTEM_ADMINISTRATION = "IT and System Administration" - FRONTEND_DEVELOPMENT = "Frontend Development" - DEV_OPS = "DevOps" - UI_UX = "UI/UX, Design & Content" - HR_RECRUITMENT = "HR & Recruitment" - MOBILE_DEVELOPMENT = "Mobile Development" - HARDWARE_ENGINEERING = "Hardware Engineering" - EMBEDDED_ENGINEERING = "Embedded, Low Level & Firmware Engineering" - CUSTOMER_SUCCESS = "Customer Success" - PROJECT_MANAGEMENT = "Project Management" - OPERATIONS = "Operations" - FINANCE = "Finance" - SYSTEMS_ENGINEERING = "Systems Engineering" - MARKETING = "Marketing" - SALES = "Sales" - LEGAL_POLICY = "Compliance, Legal & Policy" - C_LEVEL = "C-Level" - BUSINESS_DEVELOPMENT = "Business Development" - MECHANICAL_ENGINEERING = "Mechanical Engineering" - NATURAL_SCIENCE = "Natural Science" - OTHER = "Other" + BACKEND_DEVELOPER = "Backend Developer" + FULLSTACK_DEVELOPER = "Fullstack Developer" + FRONTEND_DEVELOPER = "Frontend Developer" + DATA_SCIENTIST="Data Scientist" + DATA_ANALYST="Data Analyst" + PROJECT_MANAGER="Project Manager" + CLOUD_ENGINEER="Cloud Engineer" + CLOUD_ARCHITECT="Cloud Architect" + UX_UI_DESIGNER="UX/UI Designer" + PRODUCT_MANAGER="Product Manager" + DEV_OPS_ENGINEER="DevOps Engineer" + BUSINESS_ANALYST="Business Analyst" + CYBERSECURITY_ENGINEER="Cybersecurity Engineer" + MACHINE_LEARNING_ENGINEER="Machine Learning Engineer" + ARTIFICIAL_INTELLIGENCE_ENGINEER="Artificial Intelligence Engineer" + DATABASE_ADMINISTRATOR="Database Administrator" + SYSTEMS_ADMINISTRATOR="Systems Administrator" + NETWORK_ENGINEER="Network Engineer" + TECHNICAL_SUPPORT_SPECIALIST="Technical Support Specialist" + SALES_ENGINEER="Sales Engineer" + SCRUM_MASTER="Scrum Master" + IT_MANAGER="IT Manager" diff --git a/src/model/User.py b/src/model/User.py index cf5cee7..d2ba67f 100644 --- a/src/model/User.py +++ b/src/model/User.py @@ -10,6 +10,22 @@ class User(BaseModel): username: str chat_id: Union[int, str] = None experience: Union[int, str] = None - field: Optional[Position] = None + position: Optional[Position] = None cities: Optional[list[str]] = None title_filters: Optional[list[str]] = None + + def get_myinfo_message(self): + message = "Here's your profile:\n\n" + message += f"Full Name: {self.full_name}\n" + message += f"Username: @{self.username}\n" + if self.chat_id: + message += f"Chat ID: {self.chat_id}\n" + if self.experience: + message += f"Experience: {self.experience}\n" + if self.position: + message += f"Position Level: {self.position.value}\n" + if self.cities: + message += f"Preferred Cities: {', '.join(self.cities)}\n" + if self.title_filters: + message += f"Job Title Filters: {', '.join(self.title_filters)}\n" + return message \ No newline at end of file diff --git a/src/model/job_repository.py b/src/model/job_repository.py index 85ff0ca..6a8adfa 100644 --- a/src/model/job_repository.py +++ b/src/model/job_repository.py @@ -11,18 +11,9 @@ load_dotenv() class JobRepository: - _instance = None - - def __new__(cls): - - if cls._instance is not None: - return cls._instance - - self = super().__new__(cls) - cls._instance = self - self.logger = create_logger("JobRepository") - self.collection = mongo_client._db["jobs"] - return cls._instance + def __init__(self): + self._logger = create_logger("JobRepository") + self._collection = mongo_client.get_collection('jobs') def find_by_id(self, job_id: str) -> Optional[JobPost]: """ @@ -34,7 +25,7 @@ class JobRepository: Returns: The job document if found, otherwise None. """ - result = self.collection.find_one({"id": job_id}) + result = self._collection.find_one({"id": job_id}) return JobPost(**result) def update(self, job: JobPost) -> bool: @@ -47,7 +38,7 @@ class JobRepository: Returns: True if the update was successful, False otherwise. """ - result = self.collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})}) + result = self._collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})}) return result.modified_count > 0 def insert_job(self, job: JobPost): @@ -61,8 +52,8 @@ class JobRepository: Exception: If an error occurs during insertion. """ job_dict = job.model_dump(exclude={"date_posted"}) - self.collection.insert_one(job_dict) - self.logger.info(f"Inserted new job with title {job.title}.") + self._collection.insert_one(job_dict) + self._logger.info(f"Inserted new job with title {job.title}.") def insert_many_if_not_found(self, jobs: list[JobPost]) -> tuple[list[JobPost], list[JobPost]]: """ @@ -85,8 +76,8 @@ class JobRepository: if operations: # Execute all operations in bulk - result = self.collection.bulk_write(operations) - self.logger.info(f"Matched: {result.matched_count}, Upserts: { + result = self._collection.bulk_write(operations) + self._logger.info(f"Matched: {result.matched_count}, Upserts: { result.upserted_count}, Modified: {result.modified_count}") # Get the newly inserted jobs (those that were upserted) @@ -98,3 +89,5 @@ class JobRepository: old_jobs.append(job) return old_jobs, new_jobs + +job_repository = JobRepository() \ No newline at end of file diff --git a/src/telegram_handler/button_callback/button_callback_context.py b/src/telegram_handler/button_callback/button_callback_context.py index 0a8f09a..5d06367 100644 --- a/src/telegram_handler/button_callback/button_callback_context.py +++ b/src/telegram_handler/button_callback/button_callback_context.py @@ -3,8 +3,8 @@ from __future__ import annotations from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from model.job_repository import JobRepository from jobspy import create_logger +from model.job_repository import job_repository from telegram_handler.button_callback.button_fire_strategy import FireStrategy from telegram_handler.button_callback.button_job_title_strategy import JobTitleStrategy from telegram_handler.button_callback.button_poo_strategy import PooStrategy @@ -22,7 +22,6 @@ class ButtonCallBackContext: self._data = data self._job_id = job_id self._strategy = None - self._job_repository = JobRepository() @property def strategy(self) -> ButtonStrategy: @@ -49,10 +48,10 @@ class ButtonCallBackContext: elif ReactionEmoji.PILE_OF_POO.name == self._data: self._strategy = PooStrategy(self._message) elif self._data: - job = self._job_repository.find_by_id(self._data) + job = job_repository.find_by_id(self._data) if job: chat_id = self._message.chat.id - self._strategy = JobTitleStrategy(chat_id,job) + self._strategy = JobTitleStrategy(chat_id, job) else: self._logger.error("Invalid enum value") return diff --git a/src/telegram_handler/button_callback/button_fire_strategy.py b/src/telegram_handler/button_callback/button_fire_strategy.py index 7930549..90f6050 100644 --- a/src/telegram_handler/button_callback/button_fire_strategy.py +++ b/src/telegram_handler/button_callback/button_fire_strategy.py @@ -1,8 +1,8 @@ from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from model.job_repository import JobRepository from jobspy import create_logger +from model.job_repository import job_repository from telegram_bot import TelegramBot from telegram_handler.button_callback.button_strategy import ButtonStrategy @@ -16,16 +16,15 @@ class FireStrategy(ButtonStrategy): self._message = message self._emoji = ReactionEmoji.FIRE self._telegram_bot = TelegramBot() - self._job_repository = JobRepository() self._job_id = job_id self._logger = create_logger("FireStrategy") async def execute(self): - job = self._job_repository.find_by_id(self._job_id) + job = job_repository.find_by_id(self._job_id) if not job: self._logger.error(f"Job with ID {self._job_id} not found.") return job.applied = True - self._job_repository.update(job) + job_repository.update(job) chat_id = self._message.chat.id await self._telegram_bot.set_message_reaction(chat_id, self._message.message_id, self._emoji) diff --git a/src/telegram_handler/telegram_default_handler.py b/src/telegram_handler/telegram_default_handler.py index 5b1487e..6bb80cc 100644 --- a/src/telegram_handler/telegram_default_handler.py +++ b/src/telegram_handler/telegram_default_handler.py @@ -4,9 +4,10 @@ from telegram.ext import ( ContextTypes, ) -from model.job_repository import JobRepository from jobspy import Site, scrape_jobs, JobPost from jobspy.scrapers.utils import create_logger +from model.job_repository import JobRepository +from model.user_repository import user_repository from telegram_bot import TelegramBot from telegram_handler.telegram_handler import TelegramHandler @@ -33,11 +34,8 @@ def map_jobs_to_keyboard(jobs: list[JobPost]) -> InlineKeyboardMarkup: class TelegramDefaultHandler(TelegramHandler): - def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str], search_term: str): + def __init__(self, sites: list[Site]): self.sites_to_scrap = sites - self.locations = locations - self.search_term = search_term - self.title_filters = title_filters self.telegram_bot = TelegramBot() self.jobRepository = JobRepository() if len(sites) == 1: @@ -51,17 +49,20 @@ class TelegramDefaultHandler(TelegramHandler): chat_id = update.message.chat.id await self.telegram_bot.set_message_reaction(chat_id, update.message.message_id, ReactionEmoji.FIRE) + user = user_repository.find_by_username(update.message.from_user.username) + site_names = [site.name for site in self.sites_to_scrap] site_names_print = ", ".join(site_names) + locations = [location + ", Israel" for location in user.cities] await self.telegram_bot.send_text(chat_id, f"Start scarping: {site_names_print}") filtered_out_jobs, jobs = scrape_jobs( site_name=self.sites_to_scrap, - search_term=self.search_term, - locations=self.locations, + search_term=user.position.value, + locations=locations, results_wanted=200, hours_old=48, - filter_by_title=self.title_filters, + filter_by_title=user.title_filters, country_indeed='israel' ) self.logger.info(f"Found {len(jobs)} jobs") diff --git a/src/telegram_handler/telegram_myinfo_handler.py b/src/telegram_handler/telegram_myinfo_handler.py new file mode 100644 index 0000000..853a707 --- /dev/null +++ b/src/telegram_handler/telegram_myinfo_handler.py @@ -0,0 +1,29 @@ +from telegram import Update +from telegram.constants import ReactionEmoji +from telegram.ext import ( + ContextTypes, +) + +from jobspy.scrapers.utils import create_logger +from model.user_repository import user_repository +from telegram_bot import TelegramBot +from telegram_handler.telegram_handler import TelegramHandler + + +class MyInfoTelegramHandler(TelegramHandler): + def __init__(self): + self.telegram_bot = TelegramBot() + self._logger = create_logger("MyInfoTelegramHandler") + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + self._logger.info("start handling") + chat_id = update.message.chat.id + await self.telegram_bot.set_message_reaction(chat_id, + update.message.message_id, ReactionEmoji.FIRE) + user = user_repository.find_by_username(update.message.from_user.username) + await self.telegram_bot.send_text(chat_id, user.get_myinfo_message()) + + self._logger.info("finished handling") + + +my_info_handler = MyInfoTelegramHandler() diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 7d60fc9..dcf8328 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -68,7 +68,7 @@ class TelegramStartHandler: ) return Flow.POSITION.value cached_user: User = cache_manager.find(user.username) - cached_user.field = position + cached_user.position = position cache_manager.save(cached_user.username, cached_user) await update.message.reply_text(LOCATION_MESSAGE) From c570f53e5b8de3bcf3e5a93c2ac2e497c4a00de5 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Sun, 5 Jan 2025 19:22:45 +0200 Subject: [PATCH 13/22] added reaction to each message --- .../start_handler_constats.py | 3 +++ .../telegram_start_handler.py | 24 ++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py index ee70ea1..2cb8267 100644 --- a/src/telegram_handler/start_handler_constats.py +++ b/src/telegram_handler/start_handler_constats.py @@ -14,6 +14,9 @@ LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \ EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n" +EXPERIENCE_INVALID: str = "Experience must be a number. 😕\n" \ + "Please try again" + FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any relevant tags or keywords.\n" \ "For example: 'remote', 'entry-level', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index dcf8328..68058c9 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -1,6 +1,7 @@ from enum import Enum from telegram import Update, Chat, KeyboardButton, ReplyKeyboardMarkup, ReplyKeyboardRemove +from telegram.constants import ReactionEmoji from telegram.ext import ( ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters, ) @@ -13,7 +14,7 @@ from model.user_repository import user_repository from telegram_bot import TelegramBot from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE, \ - SEARCH_MESSAGE + SEARCH_MESSAGE, EXPERIENCE_INVALID class Flow(Enum): @@ -56,6 +57,7 @@ class TelegramStartHandler: """Stores the selected position and asks for a locations.""" user = update.message.from_user self.logger.info("Position of %s: %s", user.first_name, update.message.text) + await update.message.set_reaction(ReactionEmoji.FIRE) position = next((p for p in Position if p.value == update.message.text), None) if not position: await update.message.reply_text(POSITION_NOT_FOUND) @@ -77,6 +79,7 @@ class TelegramStartHandler: async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a location.""" cities = update.message.text.split(",") + await update.message.set_reaction(ReactionEmoji.FIRE) reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_ADDRESS.name) await update.message.reply_text(VERIFY_MESSAGE % cities, reply_markup=reply_markup) @@ -89,22 +92,27 @@ class TelegramStartHandler: async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Verify for a Address.""" + await update.message.set_reaction(ReactionEmoji.FIRE) if update.message.text == "No": await update.message.reply_text(LOCATION_MESSAGE) return Flow.ADDRESS.value - reply_markup = ReplyKeyboardMarkup([["1", "2"]], one_time_keyboard=True, - input_field_placeholder=Flow.VERIFY_ADDRESS.name) - await update.message.reply_text(EXPERIENCE_MESSAGE, - reply_markup=reply_markup - ) + await update.message.reply_text(EXPERIENCE_MESSAGE) return Flow.EXPERIENCE.value async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a experience.""" + await update.message.set_reaction(ReactionEmoji.FIRE) user = update.message.from_user self.logger.info("Experience of %s: %s", user.first_name, update.message.text) + + if not update.message.text.isnumeric(): + await update.message.reply_text(EXPERIENCE_INVALID) + await update.message.reply_text(EXPERIENCE_MESSAGE) + + return Flow.EXPERIENCE.value + cached_user: User = cache_manager.find(update.message.from_user.username) cached_user.experience = update.message.text cache_manager.save(cached_user.username, cached_user) @@ -114,6 +122,7 @@ class TelegramStartHandler: async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a filters_flow.""" + await update.message.set_reaction(ReactionEmoji.FIRE) title_filters = update.message.text.split(",") reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_FILTERS.name) @@ -127,6 +136,7 @@ class TelegramStartHandler: async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Verify for a filters_flow.""" + await update.message.set_reaction(ReactionEmoji.FIRE) if update.message.text == "No": await update.message.reply_text(FILTER_TILE_MESSAGE) return Flow.FILTERS.value @@ -139,6 +149,7 @@ class TelegramStartHandler: async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Skips the location and asks for info about the user.""" + await update.message.set_reaction(ReactionEmoji.FIRE) user = update.message.from_user self.logger.info("User %s did not send a filters.", user.first_name) await update.message.reply_text(THANK_YOU_MESSAGE) @@ -148,6 +159,7 @@ class TelegramStartHandler: async def cancel(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Cancels and ends the conversation.""" + await update.message.set_reaction(ReactionEmoji.FIRE) user = update.message.from_user self.logger.info("User %s canceled the conversation.", user.first_name) await update.message.reply_text( From fced92f8713d136ad8b73ae58fbd53ad956b95e1 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 14:24:51 +0200 Subject: [PATCH 14/22] created mapper for position to goozali field type next to fix circular depand --- .../goozali/GoozaliScrapperComponent.py | 5 +- src/jobspy/scrapers/goozali/__init__.py | 12 ++-- src/jobspy/scrapers/goozali/constants.py | 66 ++++++++++++++++++- src/model/Position.py | 38 +++++------ src/model/user_repository.py | 1 - 5 files changed, 95 insertions(+), 27 deletions(-) diff --git a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py b/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py index bcab0d3..06e9a5c 100644 --- a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py +++ b/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta +from . import GoozaliFieldChoice from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice from ..utils import create_logger @@ -39,13 +40,13 @@ class GoozaliScrapperComponent: if (column.name == column_name): return column - def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice: + def find_choice_from_column(self, column: GoozaliColumn, choice_name: GoozaliFieldChoice) -> GoozaliColumnChoice: if not column.typeOptions.choices: logger.exception(f"Choices for column {column.name} doesn't exist") raise Exception(f"Choices for column {column.name} doesn't exist") for key, choice in column.typeOptions.choices.items(): - if (choice.name == choice_name): + if choice.name == choice_name.value: return choice logger.exception(f"Can't find {choice_name} for column {column.name}") diff --git a/src/jobspy/scrapers/goozali/__init__.py b/src/jobspy/scrapers/goozali/__init__.py index 90f18eb..59e334e 100644 --- a/src/jobspy/scrapers/goozali/__init__.py +++ b/src/jobspy/scrapers/goozali/__init__.py @@ -7,11 +7,12 @@ This module contains routines to scrape Goozali. from __future__ import annotations - +from model.User import User +from model.user_repository import user_repository from .. import Scraper, ScraperInput from .GoozaliMapper import GoozaliMapper from .GoozaliScrapperComponent import GoozaliScrapperComponent -from .constants import extract_goozali_column_name, job_post_column_to_goozali_column +from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest from ..site import Site @@ -20,6 +21,7 @@ from ...jobs import ( JobPost, JobResponse, ) + logger = create_logger("Goozali") @@ -67,7 +69,7 @@ class GoozaliScraper(Scraper): logger.info(f"response: {str(response)}") if (response.status_code != 200): logger.error(f"Status code: {response.status_code}, Error: { - str(response.text)}") + str(response.text)}") return JobResponse(jobs=job_list) except Exception as e: logger.error(f"Exception: {str(e)}") @@ -79,8 +81,10 @@ class GoozaliScraper(Scraper): # filter result by Field column = self.component.find_column( goozali_response.data.columns, job_post_column_to_goozali_column["field"]) + user: User = user_repository.find_by_username() + user_goozali_field = position_to_goozali_field_map[user.position] column_choice = self.component.find_choice_from_column( - column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value) + column, user_goozali_field) filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice( goozali_response.data.rows, column, column_choice) filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours( diff --git a/src/jobspy/scrapers/goozali/constants.py b/src/jobspy/scrapers/goozali/constants.py index 458320f..2f719c5 100644 --- a/src/jobspy/scrapers/goozali/constants.py +++ b/src/jobspy/scrapers/goozali/constants.py @@ -1,6 +1,7 @@ +from model.Position import Position +from . import GoozaliFieldChoice from .model import GoozaliColumn - job_post_column_to_goozali_column = { "date_posted": "Discovered", "field": "Field", @@ -23,6 +24,69 @@ job_post_column_names = ["id", "location", "company_industry"] +fields = ["Product Management", + "Data Analyst", + "Data Science, ML & Algorithms", + "Software Engineering", + "QA", + "Cybersecurity", + "IT and System Administration", + "Frontend Development", + "DevOps", + "UI/UX, Design & Content", + "HR & Recruitment", + "Mobile Development", + "Hardware Engineering", + "Embedded, Low Level & Firmware Engineering", + "Customer Success", + "Project Management", + "Operations", + "Finance", + "Systems Engineering", + "Marketing", + "Sales", + "Compliance, Legal & Policy", + "C-Level", + "Business Development", + "Mechanical Engineering", + "Natural Science", + "Other"] + +def create_position_to_goozali_field_map(): + """ + Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values. + + Returns: + dict: A dictionary mapping Position to a list of GoozaliFieldChoice. + """ + position_to_goozali_map = { + Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST], + Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT], + Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT], + Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT], + Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS], + Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT], + Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY], + Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES], + Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT], + Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + } + return position_to_goozali_map + +# Get the map +position_to_goozali_field_map = create_position_to_goozali_field_map() # Key mapper: Extract 'name' as the key def extract_goozali_column_name(column): return column.name if isinstance( diff --git a/src/model/Position.py b/src/model/Position.py index 9bdf360..7717206 100644 --- a/src/model/Position.py +++ b/src/model/Position.py @@ -5,22 +5,22 @@ class Position(str, Enum): BACKEND_DEVELOPER = "Backend Developer" FULLSTACK_DEVELOPER = "Fullstack Developer" FRONTEND_DEVELOPER = "Frontend Developer" - DATA_SCIENTIST="Data Scientist" - DATA_ANALYST="Data Analyst" - PROJECT_MANAGER="Project Manager" - CLOUD_ENGINEER="Cloud Engineer" - CLOUD_ARCHITECT="Cloud Architect" - UX_UI_DESIGNER="UX/UI Designer" - PRODUCT_MANAGER="Product Manager" - DEV_OPS_ENGINEER="DevOps Engineer" - BUSINESS_ANALYST="Business Analyst" - CYBERSECURITY_ENGINEER="Cybersecurity Engineer" - MACHINE_LEARNING_ENGINEER="Machine Learning Engineer" - ARTIFICIAL_INTELLIGENCE_ENGINEER="Artificial Intelligence Engineer" - DATABASE_ADMINISTRATOR="Database Administrator" - SYSTEMS_ADMINISTRATOR="Systems Administrator" - NETWORK_ENGINEER="Network Engineer" - TECHNICAL_SUPPORT_SPECIALIST="Technical Support Specialist" - SALES_ENGINEER="Sales Engineer" - SCRUM_MASTER="Scrum Master" - IT_MANAGER="IT Manager" + DATA_SCIENTIST = "Data Scientist" + DATA_ANALYST = "Data Analyst" + PROJECT_MANAGER = "Project Manager" + CLOUD_ENGINEER = "Cloud Engineer" + CLOUD_ARCHITECT = "Cloud Architect" + UX_UI_DESIGNER = "UX/UI Designer" + PRODUCT_MANAGER = "Product Manager" + DEV_OPS_ENGINEER = "DevOps Engineer" + BUSINESS_ANALYST = "Business Analyst" + CYBERSECURITY_ENGINEER = "Cybersecurity Engineer" + MACHINE_LEARNING_ENGINEER = "Machine Learning Engineer" + ARTIFICIAL_INTELLIGENCE_ENGINEER = "Artificial Intelligence Engineer" + DATABASE_ADMINISTRATOR = "Database Administrator" + SYSTEMS_ADMINISTRATOR = "Systems Administrator" + NETWORK_ENGINEER = "Network Engineer" + TECHNICAL_SUPPORT_SPECIALIST = "Technical Support Specialist" + SALES_ENGINEER = "Sales Engineer" + SCRUM_MASTER = "Scrum Master" + IT_MANAGER = "IT Manager" diff --git a/src/model/user_repository.py b/src/model/user_repository.py index f7ca798..9df7edc 100644 --- a/src/model/user_repository.py +++ b/src/model/user_repository.py @@ -1,6 +1,5 @@ from typing import Optional -from cachebox import LRUCache from dotenv import load_dotenv from pymongo import UpdateOne From 2be3ebcb78a1c9c26fd75e34dafb2362092b54d7 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 15:10:03 +0200 Subject: [PATCH 15/22] restructure project removed jobspy folder --- pyproject.toml | 8 +-- src/{jobspy => }/jobs/__init__.py | 0 src/jobspy/scrapers/__init__.py | 51 ------------------- src/main.py | 9 ++-- src/model/job_repository.py | 4 +- src/model/monogo_db.py | 2 +- src/model/user_repository.py | 2 +- src/{jobspy => scrapers}/__init__.py | 48 +++++++++-------- src/{jobspy => }/scrapers/exceptions.py | 2 +- .../scrapers/glassdoor/GlassDoorLocation.py | 0 .../scrapers/glassdoor/__init__.py | 9 ++-- .../scrapers/glassdoor/constants.py | 0 src/{jobspy => }/scrapers/google/__init__.py | 8 +-- src/{jobspy => }/scrapers/google/constants.py | 0 .../scrapers/goozali/GoozaliMapper.py | 2 +- src/{jobspy => }/scrapers/goozali/__init__.py | 7 +-- .../scrapers/goozali/model/GoozaliColumn.py | 0 .../goozali/model/GoozaliColumnChoice.py | 0 .../goozali/model/GoozaliColumnTypeOptions.py | 0 .../goozali/model/GoozaliFieldChoice.py | 0 .../goozali/model/GoozaliFullRequest.py | 0 .../goozali/model/GoozaliPartRequest.py | 0 .../scrapers/goozali/model/GoozaliRequest.py | 0 .../scrapers/goozali/model/GoozaliResponse.py | 0 .../goozali/model/GoozaliResponseData.py | 0 .../scrapers/goozali/model/GoozaliRow.py | 0 .../scrapers/goozali/model/__init__.py | 0 src/{jobspy => }/scrapers/indeed/__init__.py | 26 +++++----- src/{jobspy => }/scrapers/indeed/constants.py | 0 .../scrapers/linkedin/__init__.py | 10 ++-- .../scrapers/linkedin/constants.py | 0 src/scrapers/scraper.py | 17 +++++++ src/scrapers/scraper_input.py | 25 +++++++++ src/{jobspy => }/scrapers/site.py | 0 src/{jobspy => }/scrapers/utils.py | 6 +-- .../scrapers/ziprecruiter/__init__.py | 8 +-- .../scrapers/ziprecruiter/constants.py | 0 src/telegram_bot.py | 5 +- .../button_callback_context.py | 2 +- .../button_callback/button_fire_strategy.py | 2 +- .../button_job_title_strategy.py | 2 +- .../telegram_callback_handler.py | 2 +- .../telegram_default_handler.py | 4 +- .../telegram_myinfo_handler.py | 2 +- .../telegram_start_handler.py | 2 +- tests/test_all.py | 2 +- tests/test_glassdoor.py | 2 +- tests/test_google.py | 2 +- tests/test_goozali.py | 12 ++--- tests/test_indeed.py | 2 +- tests/test_linkedin.py | 2 +- tests/test_util.py | 2 +- tests/test_ziprecruiter.py | 2 +- 53 files changed, 144 insertions(+), 147 deletions(-) rename src/{jobspy => }/jobs/__init__.py (100%) delete mode 100644 src/jobspy/scrapers/__init__.py rename src/{jobspy => scrapers}/__init__.py (86%) rename src/{jobspy => }/scrapers/exceptions.py (96%) rename src/{jobspy => }/scrapers/glassdoor/GlassDoorLocation.py (100%) rename src/{jobspy => }/scrapers/glassdoor/__init__.py (99%) rename src/{jobspy => }/scrapers/glassdoor/constants.py (100%) rename src/{jobspy => }/scrapers/google/__init__.py (98%) rename src/{jobspy => }/scrapers/google/constants.py (100%) rename src/{jobspy => }/scrapers/goozali/GoozaliMapper.py (99%) rename src/{jobspy => }/scrapers/goozali/__init__.py (97%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliColumn.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliColumnChoice.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliColumnTypeOptions.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliFieldChoice.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliFullRequest.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliPartRequest.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliRequest.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliResponse.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliResponseData.py (100%) rename src/{jobspy => }/scrapers/goozali/model/GoozaliRow.py (100%) rename src/{jobspy => }/scrapers/goozali/model/__init__.py (100%) rename src/{jobspy => }/scrapers/indeed/__init__.py (94%) rename src/{jobspy => }/scrapers/indeed/constants.py (100%) rename src/{jobspy => }/scrapers/linkedin/__init__.py (98%) rename src/{jobspy => }/scrapers/linkedin/constants.py (100%) create mode 100644 src/scrapers/scraper.py create mode 100644 src/scrapers/scraper_input.py rename src/{jobspy => }/scrapers/site.py (100%) rename src/{jobspy => }/scrapers/utils.py (98%) rename src/{jobspy => }/scrapers/ziprecruiter/__init__.py (98%) rename src/{jobspy => }/scrapers/ziprecruiter/constants.py (100%) diff --git a/pyproject.toml b/pyproject.toml index c4275a7..fdb9bec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,15 +3,15 @@ requires = [ "poetry-core",] build-backend = "poetry.core.masonry.api" [tool.poetry] -name = "python-jobspy" +name = "python-JobSeekerTG" version = "1.1.76" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" -authors = [ "Zachary Hampton ", "Cullen Watson ",] -homepage = "https://github.com/Bunsly/JobSpy" +authors = [ "YM "] +homepage = "https://github.com/yariv245/JobSeekerTG" readme = "README.md" keywords = [ "jobs-scraper", "linkedin", "indeed", "glassdoor", "ziprecruiter",] [[tool.poetry.packages]] -include = "jobspy" +include = "JobSeekerTG" from = "src" [tool.black] diff --git a/src/jobspy/jobs/__init__.py b/src/jobs/__init__.py similarity index 100% rename from src/jobspy/jobs/__init__.py rename to src/jobs/__init__.py diff --git a/src/jobspy/scrapers/__init__.py b/src/jobspy/scrapers/__init__.py deleted file mode 100644 index c3f2756..0000000 --- a/src/jobspy/scrapers/__init__.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod - -from .site import Site -from ..jobs import ( - Enum, - BaseModel, - JobType, - JobResponse, - Country, - DescriptionFormat, -) - - -class SalarySource(Enum): - DIRECT_DATA = "direct_data" - DESCRIPTION = "description" - - -class ScraperInput(BaseModel): - site_type: list[Site] - search_term: str | None = None - google_search_term: str | None = None - - location: str | None = None - locations: list[str] | None = None - country: Country | None = Country.USA - distance: int | None = None - is_remote: bool = False - job_type: JobType | None = None - easy_apply: bool | None = None - offset: int = 0 - linkedin_fetch_description: bool = False - linkedin_company_ids: list[int] | None = None - description_format: DescriptionFormat | None = DescriptionFormat.MARKDOWN - - results_wanted: int = 15 - hours_old: int | None = None - - -class Scraper(ABC): - def __init__( - self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None - ): - self.site = site - self.proxies = proxies - self.ca_cert = ca_cert - - @abstractmethod - def scrape(self, scraper_input: ScraperInput) -> JobResponse: ... diff --git a/src/main.py b/src/main.py index 4f5c782..99a87c1 100644 --- a/src/main.py +++ b/src/main.py @@ -1,10 +1,9 @@ -from telegram import Update, ReplyKeyboardMarkup, ReplyKeyboardRemove -from telegram.ext import Application, CommandHandler, ConversationHandler, \ - MessageHandler, filters, ContextTypes, CallbackQueryHandler +from telegram import Update +from telegram.ext import Application, CommandHandler, CallbackQueryHandler from config.settings import settings -from jobspy import Site -from jobspy.scrapers.utils import create_logger +from scrapers import Site +from scrapers.utils import create_logger from telegram_handler import TelegramDefaultHandler from telegram_handler.button_callback.telegram_callback_handler import TelegramCallHandler from telegram_handler.telegram_myinfo_handler import my_info_handler diff --git a/src/model/job_repository.py b/src/model/job_repository.py index 6a8adfa..25b2afe 100644 --- a/src/model/job_repository.py +++ b/src/model/job_repository.py @@ -3,8 +3,8 @@ from typing import Optional from dotenv import load_dotenv from pymongo import UpdateOne -from jobspy import create_logger -from jobspy.jobs import JobPost +from scrapers import create_logger +from jobs import JobPost from .monogo_db import mongo_client load_dotenv() diff --git a/src/model/monogo_db.py b/src/model/monogo_db.py index f15c4c8..c8ab81c 100644 --- a/src/model/monogo_db.py +++ b/src/model/monogo_db.py @@ -2,7 +2,7 @@ from pymongo import MongoClient from pymongo.synchronous.database import Database from config.settings import settings -from jobspy import create_logger +from scrapers.utils import create_logger class MongoDB: diff --git a/src/model/user_repository.py b/src/model/user_repository.py index 9df7edc..608bf32 100644 --- a/src/model/user_repository.py +++ b/src/model/user_repository.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from pymongo import UpdateOne from config.cache_manager import cache_manager -from jobspy import create_logger +from scrapers.utils import create_logger from .User import User from .monogo_db import mongo_client diff --git a/src/jobspy/__init__.py b/src/scrapers/__init__.py similarity index 86% rename from src/jobspy/__init__.py rename to src/scrapers/__init__.py index f176955..c38c8db 100644 --- a/src/jobspy/__init__.py +++ b/src/scrapers/__init__.py @@ -1,31 +1,29 @@ from __future__ import annotations import re -from threading import Lock +from asyncio import Lock, as_completed +from concurrent.futures import ThreadPoolExecutor -import pandas as pd -from typing import Tuple -from concurrent.futures import ThreadPoolExecutor, as_completed - -from .scrapers.site import Site - -from .scrapers.goozali import GoozaliScraper - -from .jobs import JobPost, JobType, Location -from .scrapers.utils import set_logger_level, extract_salary, create_logger -from .scrapers.indeed import IndeedScraper -from .scrapers.ziprecruiter import ZipRecruiterScraper -from .scrapers.glassdoor import GlassdoorScraper -from .scrapers.google import GoogleJobsScraper -from .scrapers.linkedin import LinkedInScraper -from .scrapers import SalarySource, ScraperInput, JobResponse, Country -from .scrapers.exceptions import ( - LinkedInException, - IndeedException, - ZipRecruiterException, - GlassdoorException, - GoogleJobsException, +from jobs import ( + Enum, + JobType, + JobResponse, + Country, + JobPost, ) +from .glassdoor import GlassdoorScraper +from .google import GoogleJobsScraper +from .goozali import GoozaliScraper +from .indeed import IndeedScraper +from .linkedin import LinkedInScraper +from .site import Site +from .utils import set_logger_level, create_logger +from .ziprecruiter import ZipRecruiterScraper + + +class SalarySource(Enum): + DIRECT_DATA = "direct_data" + DESCRIPTION = "description" def scrape_jobs( @@ -55,7 +53,7 @@ def scrape_jobs( ) -> (list[JobPost], list[JobPost]): """ Simultaneously scrapes job data from multiple job sites. - :return: pandas dataframe containing job data + :return: list of jobPost, list of new jobPost """ SCRAPER_MAPPING = { Site.LINKEDIN: LinkedInScraper, @@ -111,7 +109,7 @@ def scrape_jobs( hours_old=hours_old ) - def scrape_site(site: Site) -> Tuple[str, JobResponse]: + def scrape_site(site: Site) -> tuple[str, JobResponse]: scraper_class = SCRAPER_MAPPING[site] scraper = scraper_class(proxies=proxies, ca_cert=ca_cert) scraped_data: JobResponse = scraper.scrape(scraper_input) diff --git a/src/jobspy/scrapers/exceptions.py b/src/scrapers/exceptions.py similarity index 96% rename from src/jobspy/scrapers/exceptions.py rename to src/scrapers/exceptions.py index eba0479..dcfb3d2 100644 --- a/src/jobspy/scrapers/exceptions.py +++ b/src/scrapers/exceptions.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.exceptions +scrapers.exceptions ~~~~~~~~~~~~~~~~~~~ This module contains the set of Scrapers' exceptions. diff --git a/src/jobspy/scrapers/glassdoor/GlassDoorLocation.py b/src/scrapers/glassdoor/GlassDoorLocation.py similarity index 100% rename from src/jobspy/scrapers/glassdoor/GlassDoorLocation.py rename to src/scrapers/glassdoor/GlassDoorLocation.py diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/scrapers/glassdoor/__init__.py similarity index 99% rename from src/jobspy/scrapers/glassdoor/__init__.py rename to src/scrapers/glassdoor/__init__.py index 6266501..d0bf582 100644 --- a/src/jobspy/scrapers/glassdoor/__init__.py +++ b/src/scrapers/glassdoor/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.glassdoor +scrapers.glassdoor ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Glassdoor. @@ -7,7 +7,6 @@ This module contains routines to scrape Glassdoor. from __future__ import annotations -from dataclasses import dataclass import re import json import requests @@ -18,14 +17,16 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from .GlassDoorLocation import GlassDoorLocationResponse, get_location_id, get_location_type from .constants import fallback_token, query_template, headers -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..utils import extract_emails_from_text, create_logger from ..exceptions import GlassdoorException from ..utils import ( create_session, markdown_converter, ) -from ...jobs import ( +from jobs import ( JobPost, Compensation, CompensationInterval, diff --git a/src/jobspy/scrapers/glassdoor/constants.py b/src/scrapers/glassdoor/constants.py similarity index 100% rename from src/jobspy/scrapers/glassdoor/constants.py rename to src/scrapers/glassdoor/constants.py diff --git a/src/jobspy/scrapers/google/__init__.py b/src/scrapers/google/__init__.py similarity index 98% rename from src/jobspy/scrapers/google/__init__.py rename to src/scrapers/google/__init__.py index 523e6f5..05cab69 100644 --- a/src/jobspy/scrapers/google/__init__.py +++ b/src/scrapers/google/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.google +scrapers.google ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Google. @@ -14,12 +14,14 @@ from typing import Tuple from datetime import datetime, timedelta from .constants import headers_jobs, headers_initial, async_param -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..utils import extract_emails_from_text, create_logger, extract_job_type from ..utils import ( create_session, ) -from ...jobs import ( +from jobs import ( JobPost, JobResponse, Location, diff --git a/src/jobspy/scrapers/google/constants.py b/src/scrapers/google/constants.py similarity index 100% rename from src/jobspy/scrapers/google/constants.py rename to src/scrapers/google/constants.py diff --git a/src/jobspy/scrapers/goozali/GoozaliMapper.py b/src/scrapers/goozali/GoozaliMapper.py similarity index 99% rename from src/jobspy/scrapers/goozali/GoozaliMapper.py rename to src/scrapers/goozali/GoozaliMapper.py index a68e6ba..0b37e43 100644 --- a/src/jobspy/scrapers/goozali/GoozaliMapper.py +++ b/src/scrapers/goozali/GoozaliMapper.py @@ -1,7 +1,7 @@ from datetime import datetime import json -from jobspy.jobs import JobPost, Location +from jobs import JobPost, Location from .model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliResponseData from .constants import job_post_column_to_goozali_column, job_post_column_names diff --git a/src/jobspy/scrapers/goozali/__init__.py b/src/scrapers/goozali/__init__.py similarity index 97% rename from src/jobspy/scrapers/goozali/__init__.py rename to src/scrapers/goozali/__init__.py index 59e334e..2a3f1b2 100644 --- a/src/jobspy/scrapers/goozali/__init__.py +++ b/src/scrapers/goozali/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.Goozali +scrapers.Goozali ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Goozali. @@ -9,15 +9,16 @@ from __future__ import annotations from model.User import User from model.user_repository import user_repository -from .. import Scraper, ScraperInput from .GoozaliMapper import GoozaliMapper from .GoozaliScrapperComponent import GoozaliScrapperComponent from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest +from ..scraper import Scraper +from ..scraper_input import ScraperInput from ..site import Site from ..utils import create_dict_by_key_and_value, create_session, create_logger -from ...jobs import ( +from jobs import ( JobPost, JobResponse, ) diff --git a/src/jobspy/scrapers/goozali/model/GoozaliColumn.py b/src/scrapers/goozali/model/GoozaliColumn.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliColumn.py rename to src/scrapers/goozali/model/GoozaliColumn.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliColumnChoice.py b/src/scrapers/goozali/model/GoozaliColumnChoice.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliColumnChoice.py rename to src/scrapers/goozali/model/GoozaliColumnChoice.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliColumnTypeOptions.py b/src/scrapers/goozali/model/GoozaliColumnTypeOptions.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliColumnTypeOptions.py rename to src/scrapers/goozali/model/GoozaliColumnTypeOptions.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliFieldChoice.py b/src/scrapers/goozali/model/GoozaliFieldChoice.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliFieldChoice.py rename to src/scrapers/goozali/model/GoozaliFieldChoice.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliFullRequest.py b/src/scrapers/goozali/model/GoozaliFullRequest.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliFullRequest.py rename to src/scrapers/goozali/model/GoozaliFullRequest.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliPartRequest.py b/src/scrapers/goozali/model/GoozaliPartRequest.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliPartRequest.py rename to src/scrapers/goozali/model/GoozaliPartRequest.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliRequest.py b/src/scrapers/goozali/model/GoozaliRequest.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliRequest.py rename to src/scrapers/goozali/model/GoozaliRequest.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliResponse.py b/src/scrapers/goozali/model/GoozaliResponse.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliResponse.py rename to src/scrapers/goozali/model/GoozaliResponse.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliResponseData.py b/src/scrapers/goozali/model/GoozaliResponseData.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliResponseData.py rename to src/scrapers/goozali/model/GoozaliResponseData.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliRow.py b/src/scrapers/goozali/model/GoozaliRow.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliRow.py rename to src/scrapers/goozali/model/GoozaliRow.py diff --git a/src/jobspy/scrapers/goozali/model/__init__.py b/src/scrapers/goozali/model/__init__.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/__init__.py rename to src/scrapers/goozali/model/__init__.py diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/scrapers/indeed/__init__.py similarity index 94% rename from src/jobspy/scrapers/indeed/__init__.py rename to src/scrapers/indeed/__init__.py index 05ae16c..80ef94b 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/scrapers/indeed/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.indeed +scrapers.indeed ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Indeed. @@ -12,7 +12,9 @@ from typing import Tuple from datetime import datetime from .constants import job_search_query, api_headers -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..utils import ( extract_emails_from_text, get_enum_from_job_type, @@ -20,7 +22,7 @@ from ..utils import ( create_session, create_logger, ) -from ...jobs import ( +from jobs import ( JobPost, Compensation, CompensationInterval, @@ -35,7 +37,7 @@ logger = create_logger("Indeed") class IndeedScraper(Scraper): def __init__( - self, proxies: list[str] | str | None = None, ca_cert: str | None = None + self, proxies: list[str] | str | None = None, ca_cert: str | None = None ): """ Initializes IndeedScraper with the Indeed API url @@ -74,7 +76,7 @@ class IndeedScraper(Scraper): while len(self.seen_urls) < scraper_input.results_wanted + scraper_input.offset: logger.info( f"search page: { - page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}" + page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}" ) jobs, cursor = self._scrape_page(cursor, location) if not jobs: @@ -85,9 +87,9 @@ class IndeedScraper(Scraper): return JobResponse( jobs=job_list[ - scraper_input.offset: scraper_input.offset - + scraper_input.results_wanted - ] + scraper_input.offset: scraper_input.offset + + scraper_input.results_wanted + ] ) def _scrape_page(self, cursor: str | None, location: str) -> Tuple[list[JobPost], str | None]: @@ -108,7 +110,7 @@ class IndeedScraper(Scraper): what=(f'what: "{search_term}"' if search_term else ""), location=( f'location: {{where: "{location}", radius: { - self.scraper_input.distance}, radiusUnit: MILES}}' + self.scraper_input.distance}, radiusUnit: MILES}}' if location else "" ), @@ -130,7 +132,7 @@ class IndeedScraper(Scraper): if not response.ok: logger.info( f"responded with status code: { - response.status_code} (submit GitHub issue if this appears to be a bug)" + response.status_code} (submit GitHub issue if this appears to be a bug)" ) return jobs, new_cursor data = response.json() @@ -232,7 +234,7 @@ class IndeedScraper(Scraper): company_name=job["employer"].get( "name") if job.get("employer") else None, company_url=(f"{self.base_url}{ - rel_url}" if job["employer"] else None), + rel_url}" if job["employer"] else None), company_url_direct=( employer["links"]["corporateWebsite"] if employer else None ), @@ -345,7 +347,7 @@ class IndeedScraper(Scraper): for keyword in remote_keywords ) return ( - is_remote_in_attributes or is_remote_in_description or is_remote_in_location + is_remote_in_attributes or is_remote_in_description or is_remote_in_location ) @staticmethod diff --git a/src/jobspy/scrapers/indeed/constants.py b/src/scrapers/indeed/constants.py similarity index 100% rename from src/jobspy/scrapers/indeed/constants.py rename to src/scrapers/indeed/constants.py diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/scrapers/linkedin/__init__.py similarity index 98% rename from src/jobspy/scrapers/linkedin/__init__.py rename to src/scrapers/linkedin/__init__.py index 4519610..8e04d3f 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/scrapers/linkedin/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.linkedin +scrapers.linkedin ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape LinkedIn. @@ -17,13 +17,15 @@ from datetime import datetime from bs4.element import Tag from bs4 import BeautifulSoup from urllib.parse import urlparse, urlunparse, unquote -from requests.exceptions import RetryError, RequestException +from requests.exceptions import RetryError from urllib3.exceptions import MaxRetryError from .constants import headers -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..exceptions import LinkedInException from ..utils import create_session, remove_attributes, create_logger -from ...jobs import ( +from jobs import ( JobPost, Location, JobResponse, diff --git a/src/jobspy/scrapers/linkedin/constants.py b/src/scrapers/linkedin/constants.py similarity index 100% rename from src/jobspy/scrapers/linkedin/constants.py rename to src/scrapers/linkedin/constants.py diff --git a/src/scrapers/scraper.py b/src/scrapers/scraper.py new file mode 100644 index 0000000..c5ba529 --- /dev/null +++ b/src/scrapers/scraper.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod + +from jobs import JobResponse +from scrapers.site import Site +from scrapers.scraper_input import ScraperInput + + +class Scraper(ABC): + def __init__( + self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None + ): + self.site = site + self.proxies = proxies + self.ca_cert = ca_cert + + @abstractmethod + def scrape(self, scraper_input: ScraperInput) -> JobResponse: ... \ No newline at end of file diff --git a/src/scrapers/scraper_input.py b/src/scrapers/scraper_input.py new file mode 100644 index 0000000..9b3a183 --- /dev/null +++ b/src/scrapers/scraper_input.py @@ -0,0 +1,25 @@ +from pydantic import BaseModel + +from jobs import Country, JobType, DescriptionFormat +from scrapers.site import Site + + +class ScraperInput(BaseModel): + site_type: list[Site] + search_term: str | None = None + google_search_term: str | None = None + + location: str | None = None + locations: list[str] | None = None + country: Country | None = Country.USA + distance: int | None = None + is_remote: bool = False + job_type: JobType | None = None + easy_apply: bool | None = None + offset: int = 0 + linkedin_fetch_description: bool = False + linkedin_company_ids: list[int] | None = None + description_format: DescriptionFormat | None = DescriptionFormat.MARKDOWN + + results_wanted: int = 15 + hours_old: int | None = None \ No newline at end of file diff --git a/src/jobspy/scrapers/site.py b/src/scrapers/site.py similarity index 100% rename from src/jobspy/scrapers/site.py rename to src/scrapers/site.py diff --git a/src/jobspy/scrapers/utils.py b/src/scrapers/utils.py similarity index 98% rename from src/jobspy/scrapers/utils.py rename to src/scrapers/utils.py index 6947650..ac77352 100644 --- a/src/jobspy/scrapers/utils.py +++ b/src/scrapers/utils.py @@ -11,11 +11,11 @@ import numpy as np from markdownify import markdownify as md from requests.adapters import HTTPAdapter, Retry -from ..jobs import CompensationInterval, JobType +from jobs import CompensationInterval, JobType def create_logger(name: str): - logger = logging.getLogger(f"JobSpy:{name}") + logger = logging.getLogger(f"JobSeekerTG:{name}") logger.propagate = False if not logger.handlers: logger.setLevel(logging.INFO) @@ -143,7 +143,7 @@ def set_logger_level(verbose: int = 2): level = getattr(logging, level_name.upper(), None) if level is not None: for logger_name in logging.root.manager.loggerDict: - if logger_name.startswith("JobSpy:"): + if logger_name.startswith("JobSeekerTG:"): logging.getLogger(logger_name).setLevel(level) else: raise ValueError(f"Invalid log level: {level_name}") diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/scrapers/ziprecruiter/__init__.py similarity index 98% rename from src/jobspy/scrapers/ziprecruiter/__init__.py rename to src/scrapers/ziprecruiter/__init__.py index 294ca8c..90dab76 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/scrapers/ziprecruiter/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.ziprecruiter +scrapers.ziprecruiter ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape ZipRecruiter. @@ -19,7 +19,9 @@ from concurrent.futures import ThreadPoolExecutor from bs4 import BeautifulSoup from .constants import headers -from .. import Scraper, ScraperInput, Site +from ..site import Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput from ..utils import ( extract_emails_from_text, create_session, @@ -27,7 +29,7 @@ from ..utils import ( remove_attributes, create_logger, ) -from ...jobs import ( +from jobs import ( JobPost, Compensation, Location, diff --git a/src/jobspy/scrapers/ziprecruiter/constants.py b/src/scrapers/ziprecruiter/constants.py similarity index 100% rename from src/jobspy/scrapers/ziprecruiter/constants.py rename to src/scrapers/ziprecruiter/constants.py diff --git a/src/telegram_bot.py b/src/telegram_bot.py index 1bfaff9..6f7c1d7 100644 --- a/src/telegram_bot.py +++ b/src/telegram_bot.py @@ -1,4 +1,3 @@ -import os from typing import Union from dotenv import load_dotenv @@ -6,8 +5,8 @@ from telegram import Bot, InlineKeyboardButton, InlineKeyboardMarkup from telegram.constants import ReactionEmoji from config.settings import settings -from jobspy.jobs import JobPost -from jobspy.scrapers.utils import create_logger +from jobs import JobPost +from scrapers.utils import create_logger load_dotenv() diff --git a/src/telegram_handler/button_callback/button_callback_context.py b/src/telegram_handler/button_callback/button_callback_context.py index 5d06367..52c5fe9 100644 --- a/src/telegram_handler/button_callback/button_callback_context.py +++ b/src/telegram_handler/button_callback/button_callback_context.py @@ -3,7 +3,7 @@ from __future__ import annotations from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from jobspy import create_logger +from scrapers import create_logger from model.job_repository import job_repository from telegram_handler.button_callback.button_fire_strategy import FireStrategy from telegram_handler.button_callback.button_job_title_strategy import JobTitleStrategy diff --git a/src/telegram_handler/button_callback/button_fire_strategy.py b/src/telegram_handler/button_callback/button_fire_strategy.py index 90f6050..44af48b 100644 --- a/src/telegram_handler/button_callback/button_fire_strategy.py +++ b/src/telegram_handler/button_callback/button_fire_strategy.py @@ -1,7 +1,7 @@ from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from jobspy import create_logger +from scrapers import create_logger from model.job_repository import job_repository from telegram_bot import TelegramBot from telegram_handler.button_callback.button_strategy import ButtonStrategy diff --git a/src/telegram_handler/button_callback/button_job_title_strategy.py b/src/telegram_handler/button_callback/button_job_title_strategy.py index a96bbf7..bec2535 100644 --- a/src/telegram_handler/button_callback/button_job_title_strategy.py +++ b/src/telegram_handler/button_callback/button_job_title_strategy.py @@ -1,6 +1,6 @@ from typing import Union -from jobspy import JobPost +from scrapers import JobPost from telegram_bot import TelegramBot from telegram_handler.button_callback.button_strategy import ButtonStrategy diff --git a/src/telegram_handler/button_callback/telegram_callback_handler.py b/src/telegram_handler/button_callback/telegram_callback_handler.py index b43f4cc..051bd8b 100644 --- a/src/telegram_handler/button_callback/telegram_callback_handler.py +++ b/src/telegram_handler/button_callback/telegram_callback_handler.py @@ -3,7 +3,7 @@ from telegram.ext import ( ContextTypes, ) -from jobspy import create_logger +from scrapers import create_logger from telegram_bot import TelegramBot from telegram_handler.button_callback.button_callback_context import ButtonCallBackContext diff --git a/src/telegram_handler/telegram_default_handler.py b/src/telegram_handler/telegram_default_handler.py index 6bb80cc..0266d4f 100644 --- a/src/telegram_handler/telegram_default_handler.py +++ b/src/telegram_handler/telegram_default_handler.py @@ -4,8 +4,8 @@ from telegram.ext import ( ContextTypes, ) -from jobspy import Site, scrape_jobs, JobPost -from jobspy.scrapers.utils import create_logger +from scrapers import Site, scrape_jobs, JobPost +from scrapers.utils import create_logger from model.job_repository import JobRepository from model.user_repository import user_repository from telegram_bot import TelegramBot diff --git a/src/telegram_handler/telegram_myinfo_handler.py b/src/telegram_handler/telegram_myinfo_handler.py index 853a707..4097833 100644 --- a/src/telegram_handler/telegram_myinfo_handler.py +++ b/src/telegram_handler/telegram_myinfo_handler.py @@ -4,7 +4,7 @@ from telegram.ext import ( ContextTypes, ) -from jobspy.scrapers.utils import create_logger +from scrapers.utils import create_logger from model.user_repository import user_repository from telegram_bot import TelegramBot from telegram_handler.telegram_handler import TelegramHandler diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 68058c9..d30c643 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -7,7 +7,7 @@ from telegram.ext import ( ) from config.cache_manager import cache_manager -from jobspy.scrapers.utils import create_logger +from scrapers.utils import create_logger from model.Position import Position from model.User import User from model.user_repository import user_repository diff --git a/tests/test_all.py b/tests/test_all.py index 3285611..6a6ff60 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_glassdoor.py b/tests/test_glassdoor.py index 267a3e6..16676ba 100644 --- a/tests/test_glassdoor.py +++ b/tests/test_glassdoor.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_google.py b/tests/test_google.py index 9f30ffe..5fa10f3 100644 --- a/tests/test_google.py +++ b/tests/test_google.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_goozali.py b/tests/test_goozali.py index 2f59956..9f68ab5 100644 --- a/tests/test_goozali.py +++ b/tests/test_goozali.py @@ -1,12 +1,12 @@ import json import os -from jobspy.jobs import JobPost -from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper -from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent -from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column -from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData -from jobspy.scrapers.utils import create_dict_by_key_and_value +from jobs import JobPost +from scrapers.goozali.GoozaliMapper import GoozaliMapper +from scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent +from scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column +from scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData +from scrapers.utils import create_dict_by_key_and_value # URL Example # https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D diff --git a/tests/test_indeed.py b/tests/test_indeed.py index 714fc53..0468afb 100644 --- a/tests/test_indeed.py +++ b/tests/test_indeed.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_linkedin.py b/tests/test_linkedin.py index 0cb5ec4..29d0bf8 100644 --- a/tests/test_linkedin.py +++ b/tests/test_linkedin.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_util.py b/tests/test_util.py index 5ad8751..bfc3f8c 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,7 +1,7 @@ from datetime import datetime, date from typing import List -from jobspy import JobPost, Location, Country +from scrapers import JobPost, Location, Country # Creating some test job posts diff --git a/tests/test_ziprecruiter.py b/tests/test_ziprecruiter.py index 61de491..a023590 100644 --- a/tests/test_ziprecruiter.py +++ b/tests/test_ziprecruiter.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd From 0d01789313ed2a3313e46997bbced3c81543ba11 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 15:14:37 +0200 Subject: [PATCH 16/22] fixed error --- src/scrapers/__init__.py | 6 ++++-- .../scrapers/goozali/GoozaliScrapperComponent.py | 3 +-- src/{jobspy => }/scrapers/goozali/constants.py | 3 +-- 3 files changed, 6 insertions(+), 6 deletions(-) rename src/{jobspy => }/scrapers/goozali/GoozaliScrapperComponent.py (98%) rename src/{jobspy => }/scrapers/goozali/constants.py (98%) diff --git a/src/scrapers/__init__.py b/src/scrapers/__init__.py index c38c8db..a28f5ae 100644 --- a/src/scrapers/__init__.py +++ b/src/scrapers/__init__.py @@ -1,8 +1,9 @@ from __future__ import annotations import re -from asyncio import Lock, as_completed -from concurrent.futures import ThreadPoolExecutor +from threading import Lock +from concurrent.futures import ThreadPoolExecutor, as_completed + from jobs import ( Enum, @@ -16,6 +17,7 @@ from .google import GoogleJobsScraper from .goozali import GoozaliScraper from .indeed import IndeedScraper from .linkedin import LinkedInScraper +from .scraper_input import ScraperInput from .site import Site from .utils import set_logger_level, create_logger from .ziprecruiter import ZipRecruiterScraper diff --git a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py b/src/scrapers/goozali/GoozaliScrapperComponent.py similarity index 98% rename from src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py rename to src/scrapers/goozali/GoozaliScrapperComponent.py index 06e9a5c..b1b1cbd 100644 --- a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py +++ b/src/scrapers/goozali/GoozaliScrapperComponent.py @@ -1,7 +1,6 @@ from datetime import datetime, timedelta -from . import GoozaliFieldChoice -from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice +from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice,GoozaliFieldChoice from ..utils import create_logger # Mapping function to convert parsed dictionary into GoozaliResponseData diff --git a/src/jobspy/scrapers/goozali/constants.py b/src/scrapers/goozali/constants.py similarity index 98% rename from src/jobspy/scrapers/goozali/constants.py rename to src/scrapers/goozali/constants.py index 2f719c5..a598b3d 100644 --- a/src/jobspy/scrapers/goozali/constants.py +++ b/src/scrapers/goozali/constants.py @@ -1,6 +1,5 @@ from model.Position import Position -from . import GoozaliFieldChoice -from .model import GoozaliColumn +from .model import GoozaliColumn, GoozaliFieldChoice job_post_column_to_goozali_column = { "date_posted": "Discovered", From 3db58a84a5f2e0db067dab4abdfbf1620caf63fe Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 15:43:21 +0200 Subject: [PATCH 17/22] fixed goozali --- src/scrapers/__init__.py | 3 ++ .../goozali/GoozaliScrapperComponent.py | 32 +++++++++++++------ src/scrapers/goozali/__init__.py | 22 ++++++------- .../goozali/model/GoozaliFullRequest.py | 8 ++--- src/scrapers/scraper_input.py | 4 ++- .../telegram_default_handler.py | 1 + 6 files changed, 43 insertions(+), 27 deletions(-) diff --git a/src/scrapers/__init__.py b/src/scrapers/__init__.py index a28f5ae..65e4a54 100644 --- a/src/scrapers/__init__.py +++ b/src/scrapers/__init__.py @@ -12,6 +12,7 @@ from jobs import ( Country, JobPost, ) +from model.User import User from .glassdoor import GlassdoorScraper from .google import GoogleJobsScraper from .goozali import GoozaliScraper @@ -30,6 +31,7 @@ class SalarySource(Enum): def scrape_jobs( site_name: str | list[str] | Site | list[Site] | None = None, + user: User = None, search_term: str | None = None, google_search_term: str | None = None, location: str | None = None, @@ -93,6 +95,7 @@ def scrape_jobs( country_enum = Country.from_string(country_indeed) scraper_input = ScraperInput( + user=user, site_type=get_site_type(), country=country_enum, search_term=search_term, diff --git a/src/scrapers/goozali/GoozaliScrapperComponent.py b/src/scrapers/goozali/GoozaliScrapperComponent.py index b1b1cbd..5025713 100644 --- a/src/scrapers/goozali/GoozaliScrapperComponent.py +++ b/src/scrapers/goozali/GoozaliScrapperComponent.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice,GoozaliFieldChoice +from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliFieldChoice from ..utils import create_logger # Mapping function to convert parsed dictionary into GoozaliResponseData @@ -13,12 +13,20 @@ class GoozaliScrapperComponent: pass # Function to filter GoozaliRows based on hours old - def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn, column_choice: GoozaliColumnChoice) -> list[GoozaliRow]: + def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn, + column_choices: list[GoozaliColumnChoice]) -> list[GoozaliRow]: return [ - row for row in rows - if row.cellValuesByColumnId[column.id] == column_choice.id + row + for row in rows + if row.cellValuesByColumnId.get(column.id) + and any(choice.id == row.cellValuesByColumnId[column.id] for choice in column_choices) ] + # return [ + # row for row in rows + # if row.cellValuesByColumnId[column.id] == column_choice.id + # ] + def filter_rows_by_hours(self, rows: list[GoozaliRow], hours: int) -> list[GoozaliRow]: # Current time now = datetime.now() @@ -39,14 +47,20 @@ class GoozaliScrapperComponent: if (column.name == column_name): return column - def find_choice_from_column(self, column: GoozaliColumn, choice_name: GoozaliFieldChoice) -> GoozaliColumnChoice: + def find_choices_from_column(self, column: GoozaliColumn, choices: list[GoozaliFieldChoice]) -> list[ + GoozaliColumnChoice]: if not column.typeOptions.choices: logger.exception(f"Choices for column {column.name} doesn't exist") raise Exception(f"Choices for column {column.name} doesn't exist") + chosen_values = [c.value for c in choices] + goozali_column_choices = [] for key, choice in column.typeOptions.choices.items(): - if choice.name == choice_name.value: - return choice + if choice.name in chosen_values: + goozali_column_choices.append(choice) - logger.exception(f"Can't find {choice_name} for column {column.name}") - raise Exception(f"Can't find {choice_name} for column {column.name}") + if len(goozali_column_choices) == 0: + logger.exception(f"Can't find {choices} for column {column.name}") + raise Exception(f"Can't find {choices} for column {column.name}") + + return goozali_column_choices diff --git a/src/scrapers/goozali/__init__.py b/src/scrapers/goozali/__init__.py index 2a3f1b2..e2e2f34 100644 --- a/src/scrapers/goozali/__init__.py +++ b/src/scrapers/goozali/__init__.py @@ -7,8 +7,10 @@ This module contains routines to scrape Goozali. from __future__ import annotations -from model.User import User -from model.user_repository import user_repository +from jobs import ( + JobPost, + JobResponse, +) from .GoozaliMapper import GoozaliMapper from .GoozaliScrapperComponent import GoozaliScrapperComponent from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map @@ -16,14 +18,9 @@ from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, Goozal from ..scraper import Scraper from ..scraper_input import ScraperInput from ..site import Site - from ..utils import create_dict_by_key_and_value, create_session, create_logger -from jobs import ( - JobPost, - JobResponse, -) -logger = create_logger("Goozali") +logger = create_logger("GoozaliScraper") class GoozaliScraper(Scraper): @@ -82,12 +79,11 @@ class GoozaliScraper(Scraper): # filter result by Field column = self.component.find_column( goozali_response.data.columns, job_post_column_to_goozali_column["field"]) - user: User = user_repository.find_by_username() - user_goozali_field = position_to_goozali_field_map[user.position] - column_choice = self.component.find_choice_from_column( - column, user_goozali_field) + user_goozali_fields = position_to_goozali_field_map[scraper_input.user.position] + column_choices = self.component.find_choices_from_column( + column, user_goozali_fields) filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice( - goozali_response.data.rows, column, column_choice) + goozali_response.data.rows, column, column_choices) filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours( filtered_rows_by_column_choice, scraper_input.hours_old) dict_column_name_to_column: dict[str, GoozaliColumn] = create_dict_by_key_and_value( diff --git a/src/scrapers/goozali/model/GoozaliFullRequest.py b/src/scrapers/goozali/model/GoozaliFullRequest.py index 3387ed8..8c90193 100644 --- a/src/scrapers/goozali/model/GoozaliFullRequest.py +++ b/src/scrapers/goozali/model/GoozaliFullRequest.py @@ -6,13 +6,13 @@ class GoozaliFullRequest(): self.view_id: str = "viwIOzPYaUGxlA0Jd" self.url = base_url.format(view_id=self.view_id) self.application_id: str = "appwewqLk7iUY4azc" - self.air_table_page_load_id: str = "pglqAAzFDZEWCEC7s" + self.air_table_page_load_id: str = "pglke45UFwdvQgBNJ" self.stringifiedObjectParams = { "shouldUseNestedResponseFormat": "true"} self.cookies: dict[str, str] = {} - self.request_id: str = "req4q4tKw3woEEWxw&" + self.request_id: str = "reqGjlEjOQFyRssam" self.share_id: str = "shrQBuWjXd0YgPqV6" - self.signature: str = "be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59" + self.signature: str = "7a1402a3f7f6f9a23c8db3849878812f2d3141da60f3b3d6e14dd4a910b91b74" self.headers = self._generate_headers() self.params = self._generate_params() self.cookies = {} @@ -66,7 +66,7 @@ class GoozaliFullRequest(): "shareId": self.share_id, "applicationId": self.application_id, "generationNumber": 0, - "expires": "2025-01-02T00:00:00.000Z", + "expires": "2025-01-30T00:00:00.000Z", "signature": self.signature } # Convert to a JSON string diff --git a/src/scrapers/scraper_input.py b/src/scrapers/scraper_input.py index 9b3a183..381eec2 100644 --- a/src/scrapers/scraper_input.py +++ b/src/scrapers/scraper_input.py @@ -1,11 +1,13 @@ from pydantic import BaseModel from jobs import Country, JobType, DescriptionFormat +from model.User import User from scrapers.site import Site class ScraperInput(BaseModel): site_type: list[Site] + user: User search_term: str | None = None google_search_term: str | None = None @@ -22,4 +24,4 @@ class ScraperInput(BaseModel): description_format: DescriptionFormat | None = DescriptionFormat.MARKDOWN results_wanted: int = 15 - hours_old: int | None = None \ No newline at end of file + hours_old: int | None = None diff --git a/src/telegram_handler/telegram_default_handler.py b/src/telegram_handler/telegram_default_handler.py index 0266d4f..02bcf35 100644 --- a/src/telegram_handler/telegram_default_handler.py +++ b/src/telegram_handler/telegram_default_handler.py @@ -58,6 +58,7 @@ class TelegramDefaultHandler(TelegramHandler): f"Start scarping: {site_names_print}") filtered_out_jobs, jobs = scrape_jobs( site_name=self.sites_to_scrap, + user=user, search_term=user.position.value, locations=locations, results_wanted=200, From ecac900d293d6b78b5aa372c68469cb33a313156 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 15:47:54 +0200 Subject: [PATCH 18/22] removed comments --- src/scrapers/goozali/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/scrapers/goozali/__init__.py b/src/scrapers/goozali/__init__.py index e2e2f34..4396bed 100644 --- a/src/scrapers/goozali/__init__.py +++ b/src/scrapers/goozali/__init__.py @@ -72,11 +72,8 @@ class GoozaliScraper(Scraper): except Exception as e: logger.error(f"Exception: {str(e)}") return JobResponse(jobs=job_list) - # model the response with models goozali_response = self.mapper.map_response_to_goozali_response( response=response) - # suggestL create groupby field and then filter by hours - # filter result by Field column = self.component.find_column( goozali_response.data.columns, job_post_column_to_goozali_column["field"]) user_goozali_fields = position_to_goozali_field_map[scraper_input.user.position] @@ -88,7 +85,6 @@ class GoozaliScraper(Scraper): filtered_rows_by_column_choice, scraper_input.hours_old) dict_column_name_to_column: dict[str, GoozaliColumn] = create_dict_by_key_and_value( goozali_response.data.columns, extract_goozali_column_name) - # map to JobResponse Object for row in filtered_rows_by_age_and_column_choice: job_post = self.mapper.map_goozali_response_to_job_post( row, dict_column_name_to_column) From f400c6eee7dc510b29a0700870b2efe5f0823648 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 15:51:17 +0200 Subject: [PATCH 19/22] add strip for cites --- src/telegram_handler/telegram_start_handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index d30c643..a6426f6 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -79,6 +79,8 @@ class TelegramStartHandler: async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a location.""" cities = update.message.text.split(",") + # Remove leading/trailing spaces from each city name + cities = [city.strip() for city in cities] await update.message.set_reaction(ReactionEmoji.FIRE) reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_ADDRESS.name) From 6e841ffc22fdb8ea4071035b635a4b517991a290 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 16:09:16 +0200 Subject: [PATCH 20/22] added step and updated some messages --- .../start_handler_constats.py | 19 +++++++--- .../telegram_start_handler.py | 36 ++++++++++++++++--- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py index 2cb8267..0644d5f 100644 --- a/src/telegram_handler/start_handler_constats.py +++ b/src/telegram_handler/start_handler_constats.py @@ -1,4 +1,4 @@ -START_MESSAGE: str = "Hi there! I'm Professor Bot, your friendly job search assistant.😊\n" \ +START_MESSAGE: str = "Hi there! I'm JobSeeker Bot, your friendly job search assistant.😊\n" \ "I'm here to help you find the perfect position.\n\n" \ "To stop chatting with me at any time, just send '/cancel'.\n\n" @@ -7,20 +7,29 @@ POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \ POSITION_NOT_FOUND: str = "I couldn't find any positions matching your request. 😕\n" \ "Please try again" -multi_value_message: str = "📌 You can enter multiple tags separated by commas." +multi_value_message: str = "Enter multiple values separated by commas (e.g., value1, value2, value3) ✍️" LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \ "(e.g., Rishon Lezion, New York City, San Francisco)\n\n" + multi_value_message EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n" -EXPERIENCE_INVALID: str = "Experience must be a number. 😕\n" \ - "Please try again" +EXPERIENCE_INVALID: str = "Oops! Please enter your experience in years as a number.😕" \ + "For example, 2, 5, or 10." + +JOB_AGE_MESSAGE: str = "How recent should the jobs be? ⏰\n" \ + "(Enter the number of hours, e.g., 24 for last 24 hours, 168 for last week)" + +# JOB_AGE_MESSAGE: str = "Within how many hours do you want to see jobs posted? ⏰\n" \ +# "(Enter a number, e.g., 48 for the last 48 hours)" + +JOB_AGE_INVALID: str = "Oops! Please enter a number for the number of hours. 😕\n" \ + "For example, 24, 48, or 168." FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any relevant tags or keywords.\n" \ "For example: 'remote', 'entry-level', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message -THANK_YOU_MESSAGE: str = "Thank you for chatting with Professor Bot!\n\n" \ +THANK_YOU_MESSAGE: str = "Thank you for chatting with JobSeeker Bot!\n\n" \ "I can help you find jobs on LinkedIn, Glassdoor, and more." SEARCH_MESSAGE: str = "To search for jobs on a specific site, simply send the site name:\n" \ diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index a6426f6..8a35435 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -25,6 +25,7 @@ class Flow(Enum): VERIFY_ADDRESS = 4 VERIFY_FILTERS = 5 SKIP_FILTERS = 6 + JOB_AGE = 7 class TelegramStartHandler: @@ -57,9 +58,9 @@ class TelegramStartHandler: """Stores the selected position and asks for a locations.""" user = update.message.from_user self.logger.info("Position of %s: %s", user.first_name, update.message.text) - await update.message.set_reaction(ReactionEmoji.FIRE) position = next((p for p in Position if p.value == update.message.text), None) if not position: + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) await update.message.reply_text(POSITION_NOT_FOUND) buttons = [[KeyboardButton(position.value)] for position in Position] reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, @@ -69,6 +70,8 @@ class TelegramStartHandler: reply_markup=reply_markup, ) return Flow.POSITION.value + + await update.message.set_reaction(ReactionEmoji.FIRE) cached_user: User = cache_manager.find(user.username) cached_user.position = position cache_manager.save(cached_user.username, cached_user) @@ -94,32 +97,55 @@ class TelegramStartHandler: async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Verify for a Address.""" - await update.message.set_reaction(ReactionEmoji.FIRE) if update.message.text == "No": + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) await update.message.reply_text(LOCATION_MESSAGE) return Flow.ADDRESS.value + await update.message.set_reaction(ReactionEmoji.FIRE) await update.message.reply_text(EXPERIENCE_MESSAGE) return Flow.EXPERIENCE.value async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Asks for a experience.""" - await update.message.set_reaction(ReactionEmoji.FIRE) user = update.message.from_user self.logger.info("Experience of %s: %s", user.first_name, update.message.text) if not update.message.text.isnumeric(): + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) await update.message.reply_text(EXPERIENCE_INVALID) await update.message.reply_text(EXPERIENCE_MESSAGE) return Flow.EXPERIENCE.value + await update.message.set_reaction(ReactionEmoji.FIRE) cached_user: User = cache_manager.find(update.message.from_user.username) cached_user.experience = update.message.text cache_manager.save(cached_user.username, cached_user) await update.message.reply_text( FILTER_TILE_MESSAGE) + return Flow.JOB_AGE.value + + async def job_age(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Asks for a Job age in hours.""" + await update.message.set_reaction(ReactionEmoji.FIRE) + user = update.message.from_user + self.logger.info("Job age of %s: %s", user.first_name, update.message.text) + + if not update.message.text.isnumeric(): + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) + await update.message.reply_text(EXPERIENCE_INVALID) + await update.message.reply_text(EXPERIENCE_MESSAGE) + + return Flow.EXPERIENCE.value + await update.message.set_reaction(ReactionEmoji.FIRE) + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.experience = update.message.text + cache_manager.save(cached_user.username, cached_user) + await update.message.reply_text( + FILTER_TILE_MESSAGE) + return Flow.FILTERS.value async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: @@ -138,11 +164,12 @@ class TelegramStartHandler: async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: """Verify for a filters_flow.""" - await update.message.set_reaction(ReactionEmoji.FIRE) if update.message.text == "No": + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) await update.message.reply_text(FILTER_TILE_MESSAGE) return Flow.FILTERS.value + await update.message.set_reaction(ReactionEmoji.FIRE) await update.message.reply_text(THANK_YOU_MESSAGE) await update.message.reply_text(SEARCH_MESSAGE) cached_user: User = cache_manager.find(update.message.from_user.username) @@ -180,6 +207,7 @@ start_conv_handler = ConversationHandler( Flow.ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.address)], Flow.VERIFY_ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.verify_address)], Flow.EXPERIENCE.value: [MessageHandler(filters.TEXT, start_handler.experience)], + Flow.JOB_AGE.value: [MessageHandler(filters.TEXT, start_handler.job_age)], Flow.FILTERS.value: [MessageHandler(filters.TEXT, start_handler.filters_flow)], Flow.VERIFY_FILTERS.value: [MessageHandler(filters.TEXT, start_handler.verify_filter)], }, From 9032949c0c65845cacb372e410aabe192ab07131 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 16:18:18 +0200 Subject: [PATCH 21/22] added step and updated some messages --- src/model/User.py | 5 ++++- src/telegram_handler/start_handler_constats.py | 7 ++++--- src/telegram_handler/telegram_start_handler.py | 17 +++++++++-------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/model/User.py b/src/model/User.py index d2ba67f..afa1019 100644 --- a/src/model/User.py +++ b/src/model/User.py @@ -10,6 +10,7 @@ class User(BaseModel): username: str chat_id: Union[int, str] = None experience: Union[int, str] = None + job_age: Union[int, str] = None position: Optional[Position] = None cities: Optional[list[str]] = None title_filters: Optional[list[str]] = None @@ -20,8 +21,10 @@ class User(BaseModel): message += f"Username: @{self.username}\n" if self.chat_id: message += f"Chat ID: {self.chat_id}\n" + if self.job_age: + message += f"Job Age (Hours): {self.experience}\n" if self.experience: - message += f"Experience: {self.experience}\n" + message += f"Experience(Years): {self.experience}\n" if self.position: message += f"Position Level: {self.position.value}\n" if self.cities: diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py index 0644d5f..7a32775 100644 --- a/src/telegram_handler/start_handler_constats.py +++ b/src/telegram_handler/start_handler_constats.py @@ -26,16 +26,17 @@ JOB_AGE_MESSAGE: str = "How recent should the jobs be? ⏰\n" \ JOB_AGE_INVALID: str = "Oops! Please enter a number for the number of hours. 😕\n" \ "For example, 24, 48, or 168." -FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any relevant tags or keywords.\n" \ - "For example: 'remote', 'entry-level', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message +FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any NOT relevant tags or keywords.\n" \ + "For example: 'remote', 'BI', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message THANK_YOU_MESSAGE: str = "Thank you for chatting with JobSeeker Bot!\n\n" \ "I can help you find jobs on LinkedIn, Glassdoor, and more." SEARCH_MESSAGE: str = "To search for jobs on a specific site, simply send the site name:\n" \ "/linkedin\n" \ + "/indeed\n" \ "/glassdoor\n" \ - "/google\n\n" \ + "/goozali\n\n" \ "Or, use the command /find to search across all supported job boards for a broader search.\n\n" \ "Let me know how I can assist you further! 😊" diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py index 8a35435..b00218f 100644 --- a/src/telegram_handler/telegram_start_handler.py +++ b/src/telegram_handler/telegram_start_handler.py @@ -7,14 +7,14 @@ from telegram.ext import ( ) from config.cache_manager import cache_manager -from scrapers.utils import create_logger from model.Position import Position from model.User import User from model.user_repository import user_repository +from scrapers.utils import create_logger from telegram_bot import TelegramBot from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE, \ - SEARCH_MESSAGE, EXPERIENCE_INVALID + SEARCH_MESSAGE, EXPERIENCE_INVALID, JOB_AGE_INVALID, JOB_AGE_MESSAGE class Flow(Enum): @@ -123,8 +123,7 @@ class TelegramStartHandler: cached_user: User = cache_manager.find(update.message.from_user.username) cached_user.experience = update.message.text cache_manager.save(cached_user.username, cached_user) - await update.message.reply_text( - FILTER_TILE_MESSAGE) + await update.message.reply_text(JOB_AGE_MESSAGE) return Flow.JOB_AGE.value async def job_age(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: @@ -135,13 +134,13 @@ class TelegramStartHandler: if not update.message.text.isnumeric(): await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) - await update.message.reply_text(EXPERIENCE_INVALID) - await update.message.reply_text(EXPERIENCE_MESSAGE) + await update.message.reply_text(JOB_AGE_INVALID) + await update.message.reply_text(JOB_AGE_MESSAGE) - return Flow.EXPERIENCE.value + return Flow.JOB_AGE.value await update.message.set_reaction(ReactionEmoji.FIRE) cached_user: User = cache_manager.find(update.message.from_user.username) - cached_user.experience = update.message.text + cached_user.job_age = update.message.text cache_manager.save(cached_user.username, cached_user) await update.message.reply_text( FILTER_TILE_MESSAGE) @@ -152,6 +151,8 @@ class TelegramStartHandler: """Asks for a filters_flow.""" await update.message.set_reaction(ReactionEmoji.FIRE) title_filters = update.message.text.split(",") + # Remove leading/trailing spaces from each city name + title_filters = [title_filter.strip() for title_filter in title_filters] reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, input_field_placeholder=Flow.VERIFY_FILTERS.name) await update.message.reply_text(VERIFY_MESSAGE % title_filters, reply_markup=reply_markup) From cad0ad94b36510439330208d7adf497ecd979186 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 16:24:00 +0200 Subject: [PATCH 22/22] remove manipulation for country --- src/telegram_handler/telegram_default_handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/telegram_handler/telegram_default_handler.py b/src/telegram_handler/telegram_default_handler.py index 02bcf35..3980f81 100644 --- a/src/telegram_handler/telegram_default_handler.py +++ b/src/telegram_handler/telegram_default_handler.py @@ -53,16 +53,16 @@ class TelegramDefaultHandler(TelegramHandler): site_names = [site.name for site in self.sites_to_scrap] site_names_print = ", ".join(site_names) - locations = [location + ", Israel" for location in user.cities] + # locations = [location + ", Israel" for location in user.cities] await self.telegram_bot.send_text(chat_id, f"Start scarping: {site_names_print}") filtered_out_jobs, jobs = scrape_jobs( site_name=self.sites_to_scrap, user=user, search_term=user.position.value, - locations=locations, + locations=user.cities, results_wanted=200, - hours_old=48, + hours_old=int(user.job_age), filter_by_title=user.title_filters, country_indeed='israel' )