new handler for goozali

pull/231/head
Yariv Menachem 2024-12-26 17:31:55 +02:00
parent f1b127ae44
commit 34e85b9261
3 changed files with 53 additions and 23 deletions

View File

@ -1,31 +1,19 @@
import os
import re
from telegram import Update
from telegram.ext import Application, CommandHandler
from src.jobspy import Site, scrape_jobs
from src.jobspy.db.job_repository import JobRepository
from src.jobspy.jobs import JobPost
from src.jobspy.scrapers.utils import create_logger
from src.telegram_bot import TelegramBot
from src.telegram_handler import TelegramAllHandler
from src.telegram_handler import TelegramAllHandler,TelegramGoozaliHandler
logger = create_logger("Main")
title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end",
"Front End", "DevOps", "Physical", "Staff",
"automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student",
"Data Engineer", "DevSecOps"]
def filter_jobs_by_title_name(job: JobPost):
for filter_title in title_filters:
if re.search(filter_title, job.title, re.IGNORECASE):
logger.info(f"job filtered out by title: {job.id} , {
job.title} , found {filter_title}")
return False
return True
"Front End", "DevOps", "Physical", "Staff",
"automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student",
"Data Engineer", "DevSecOps"]
async def main():
@ -44,7 +32,6 @@ async def main():
country_indeed='israel'
)
logger.info(f"Found {len(jobs)} jobs")
jobs = list(filter(filter_jobs_by_title_name, jobs))
newJobs = jobRepository.insertManyIfNotFound(jobs)
for newJob in newJobs:
await telegramBot.sendJob(newJob)
@ -52,19 +39,22 @@ async def main():
# Run the async main function
if __name__ == "__main__":
# asyncio.run(main())
logger.info("Starting initialize ")
_api_token = os.getenv("TELEGRAM_API_TOKEN")
search_term = "software engineer"
locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"]
application = Application.builder().token(_api_token).build()
tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI],
locations=["Tel Aviv, Israel", "Ramat Gan, Israel",
"Central, Israel", "Rehovot ,Israel"],
tg_handler_all = TelegramAllHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI],
locations=locations,
title_filters=title_filters,
search_term="software engineer")
search_term=search_term)
application.add_handler(CommandHandler("findAll", tg_handler_all.handle))
tg_handler_goozali = TelegramGoozaliHandler(locations=locations,
title_filters=title_filters,
search_term=search_term)
application.add_handler(CommandHandler("goozali", tg_handler_goozali.handle))
# application.add_handler(CommandHandler("galssdoor", find_glassdoor))
# application.add_handler(CommandHandler("linkedin", find_linkedin))
# application.add_handler(CommandHandler("indeed", find_indeed))
# application.add_handler(CommandHandler("goozali", find_goozali))
logger.info("Run polling from telegram")
application.run_polling(allowed_updates=Update.ALL_TYPES)

View File

@ -1 +1,2 @@
from .telegram_all_handler import TelegramAllHandler
from .telegram_goozali_handler import TelegramGoozaliHandler

View File

@ -0,0 +1,39 @@
from telegram import Update
from telegram.ext import (
ContextTypes,
)
from src.jobspy import Site, scrape_jobs
from src.jobspy.db.job_repository import JobRepository
from src.jobspy.scrapers.utils import create_logger
from src.telegram_bot import TelegramBot
from src.telegram_handler.telegram_handler import TelegramHandler
logger = create_logger("TelegramAllHandler")
class TelegramGoozaliHandler(TelegramHandler):
def __init__(self, locations: list[str], title_filters: list[str], search_term: str):
self.sites_to_scrap = [Site.GOOZALI]
self.locations = locations
self.search_term = search_term
self.title_filters = title_filters
self.telegramBot = TelegramBot()
self.jobRepository = JobRepository()
async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
logger.info("start handling")
jobs = scrape_jobs(
site_name=self.sites_to_scrap,
search_term=self.search_term,
locations=self.locations,
results_wanted=200,
hours_old=48,
country_indeed='israel',
filter_by_title=self.title_filters
)
logger.info(f"Found {len(jobs)} jobs")
new_jobs = self.jobRepository.insertManyIfNotFound(jobs)
for newJob in new_jobs:
await self.telegramBot.sendJob(newJob)
logger.info("finished handling")