mirror of https://github.com/Bunsly/JobSpy
commit
6b6b55f87b
|
@ -1,4 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from threading import Lock
|
||||
|
||||
import pandas as pd
|
||||
|
@ -48,8 +50,9 @@ def scrape_jobs(
|
|||
hours_old: int = None,
|
||||
enforce_annual_salary: bool = False,
|
||||
verbose: int = 2,
|
||||
filter_by_title:list[str] = None,
|
||||
** kwargs,
|
||||
) -> pd.DataFrame:
|
||||
) -> list[JobPost]:
|
||||
"""
|
||||
Simultaneously scrapes job data from multiple job sites.
|
||||
:return: pandas dataframe containing job data
|
||||
|
@ -148,4 +151,13 @@ def scrape_jobs(
|
|||
except Exception as e:
|
||||
logger.error(f"Future Error occurred: {e}")
|
||||
|
||||
return merged_jobs
|
||||
def filter_jobs_by_title_name(job: JobPost):
|
||||
for filter_title in filter_by_title:
|
||||
if re.search(filter_title, job.title, re.IGNORECASE):
|
||||
logger.info(f"job filtered out by title: {job.id} , {
|
||||
job.title} , found {filter_title}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
return list(filter(filter_jobs_by_title_name, merged_jobs))
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
import asyncio
|
||||
import re
|
||||
from src.jobspy import Site, scrape_jobs
|
||||
from src.jobspy.db.job_repository import JobRepository
|
||||
from src.jobspy.jobs import JobPost
|
||||
from src.jobspy.scrapers.utils import create_logger
|
||||
from src.jobspy.telegram_bot import TelegramBot
|
||||
|
||||
logger = create_logger("Main")
|
||||
filter_by_title: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", "Front End", "DevOps", "Physical", "Staff",
|
||||
"automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"]
|
||||
|
||||
|
||||
def filter_jobs_by_title_name(job: JobPost):
|
||||
for filter_title in filter_by_title:
|
||||
if re.search(filter_title, job.title, re.IGNORECASE):
|
||||
logger.info(f"job filtered out by title: {job.id} , {
|
||||
job.title} , found {filter_title}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def main():
|
||||
telegramBot = TelegramBot()
|
||||
jobRepository = JobRepository()
|
||||
# sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]
|
||||
sites_to_scrap = [Site.GOOZALI]
|
||||
# sites_to_scrap = [Site.GOOZALI]
|
||||
jobs = scrape_jobs(
|
||||
site_name=sites_to_scrap,
|
||||
search_term="software engineer",
|
||||
locations=["Tel Aviv, Israel", "Ramat Gan, Israel",
|
||||
"Central, Israel", "Rehovot ,Israel"],
|
||||
results_wanted=200,
|
||||
hours_old=48,
|
||||
country_indeed='israel'
|
||||
)
|
||||
logger.info(f"Found {len(jobs)} jobs")
|
||||
jobs = list(filter(filter_jobs_by_title_name, jobs))
|
||||
newJobs = jobRepository.insertManyIfNotFound(jobs)
|
||||
for newJob in newJobs:
|
||||
await telegramBot.sendJob(newJob)
|
||||
|
||||
# Run the async main function
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
|
@ -0,0 +1,51 @@
|
|||
import os
|
||||
|
||||
from telegram import Update
|
||||
from telegram.ext import Application, CommandHandler
|
||||
|
||||
from src.jobspy import Site
|
||||
from src.jobspy.scrapers.utils import create_logger
|
||||
from src.telegram_handler import TelegramIndeedHandler, TelegramDefaultHandler
|
||||
|
||||
logger = create_logger("Main")
|
||||
title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end",
|
||||
"Front End", "DevOps", "Physical", "Staff",
|
||||
"automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student",
|
||||
"Data Engineer", "DevSecOps"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("Starting initialize ")
|
||||
_api_token = os.getenv("TELEGRAM_API_TOKEN")
|
||||
search_term = "software engineer"
|
||||
locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"]
|
||||
application = Application.builder().token(_api_token).build()
|
||||
tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
application.add_handler(CommandHandler("findAll", tg_handler_all.handle))
|
||||
# Goozali
|
||||
tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
application.add_handler(CommandHandler(Site.GOOZALI.value, tg_handler_goozali.handle))
|
||||
# GlassDoor
|
||||
tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
application.add_handler(CommandHandler(Site.GLASSDOOR.value, tg_handler_glassdoor.handle))
|
||||
# LinkeDin
|
||||
tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
application.add_handler(CommandHandler(Site.LINKEDIN.value, tg_handler_linkedin.handle))
|
||||
# Indeed
|
||||
tg_handler_indeed = TelegramIndeedHandler(locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
application.add_handler(CommandHandler(Site.INDEED.value, tg_handler_indeed.handle))
|
||||
logger.info("Run polling from telegram")
|
||||
application.run_polling(allowed_updates=Update.ALL_TYPES)
|
|
@ -1,9 +1,16 @@
|
|||
import os
|
||||
from dotenv import load_dotenv
|
||||
from telegram import Bot
|
||||
|
||||
from .jobs import JobPost
|
||||
from .scrapers.utils import create_logger
|
||||
from telegram import Bot, Update
|
||||
from telegram.ext import (
|
||||
Application,
|
||||
CommandHandler,
|
||||
ContextTypes,
|
||||
ConversationHandler,
|
||||
MessageHandler,
|
||||
filters,
|
||||
)
|
||||
from src.jobspy.jobs import JobPost
|
||||
from src.jobspy.scrapers.utils import create_logger
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
@ -21,8 +28,7 @@ class TelegramBot:
|
|||
"""
|
||||
Send JobPost details to Telegram chat.
|
||||
"""
|
||||
message = f"New Job Posted:\n\n" \
|
||||
f"Job ID: {job.id}\n" \
|
||||
message = f"Job ID: {job.id}\n" \
|
||||
f"Job Title: {job.title}\n" \
|
||||
f"Company: {job.company_name}\n" \
|
||||
f"Location: {job.location.display_location()}\n" \
|
||||
|
@ -32,4 +38,4 @@ class TelegramBot:
|
|||
logger.info(f"Sent job to Telegram: {job.id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send job to Telegram: {job.id}")
|
||||
logger.error(f"Error: {e}")
|
||||
logger.error(f"Error: {e}")
|
|
@ -0,0 +1,2 @@
|
|||
from .telegram_default_handler import TelegramDefaultHandler
|
||||
from .telegram_indeed_handler import TelegramIndeedHandler
|
|
@ -0,0 +1,40 @@
|
|||
from telegram import Update
|
||||
from telegram.ext import (
|
||||
ContextTypes,
|
||||
)
|
||||
|
||||
from src.jobspy import Site, scrape_jobs
|
||||
from src.jobspy.db.job_repository import JobRepository
|
||||
from src.jobspy.scrapers.utils import create_logger
|
||||
from src.telegram_bot import TelegramBot
|
||||
from src.telegram_handler.telegram_handler import TelegramHandler
|
||||
|
||||
|
||||
class TelegramDefaultHandler(TelegramHandler):
|
||||
def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str], search_term: str):
|
||||
self.sites_to_scrap = sites
|
||||
self.locations = locations
|
||||
self.search_term = search_term
|
||||
self.title_filters = title_filters
|
||||
self.telegramBot = TelegramBot()
|
||||
self.jobRepository = JobRepository()
|
||||
if len(sites) == 1:
|
||||
self.logger = create_logger(f"Telegram{sites[0].name.title()}Handler")
|
||||
else:
|
||||
self.logger = create_logger("TelegramAllHandler")
|
||||
|
||||
async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
self.logger.info("start handling")
|
||||
jobs = scrape_jobs(
|
||||
site_name=self.sites_to_scrap,
|
||||
search_term=self.search_term,
|
||||
locations=self.locations,
|
||||
results_wanted=200,
|
||||
hours_old=48,
|
||||
filter_by_title=self.title_filters
|
||||
)
|
||||
self.logger.info(f"Found {len(jobs)} jobs")
|
||||
new_jobs = self.jobRepository.insertManyIfNotFound(jobs)
|
||||
for newJob in new_jobs:
|
||||
await self.telegramBot.sendJob(newJob)
|
||||
self.logger.info("finished handling")
|
|
@ -0,0 +1,12 @@
|
|||
from abc import ABC, abstractmethod
|
||||
|
||||
from telegram import Update
|
||||
from telegram.ext import ContextTypes
|
||||
|
||||
|
||||
# Define an abstract class
|
||||
class TelegramHandler(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def handle(self, update: Update, context: ContextTypes):
|
||||
pass # This is an abstract method, no implementation here.
|
|
@ -0,0 +1,38 @@
|
|||
from telegram import Update
|
||||
from telegram.ext import (
|
||||
ContextTypes,
|
||||
)
|
||||
|
||||
from src.jobspy import Site, scrape_jobs
|
||||
from src.jobspy.db.job_repository import JobRepository
|
||||
from src.jobspy.scrapers.utils import create_logger
|
||||
from src.telegram_bot import TelegramBot
|
||||
from src.telegram_handler.telegram_handler import TelegramHandler
|
||||
|
||||
|
||||
class TelegramIndeedHandler(TelegramHandler):
|
||||
def __init__(self, locations: list[str], title_filters: list[str], search_term: str):
|
||||
self.sites_to_scrap = [Site.INDEED]
|
||||
self.locations = locations
|
||||
self.search_term = search_term
|
||||
self.title_filters = title_filters
|
||||
self.telegramBot = TelegramBot()
|
||||
self.jobRepository = JobRepository()
|
||||
self.logger = create_logger(f"Telegram{self.sites_to_scrap[0].name.title()}Handler")
|
||||
|
||||
async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
self.logger.info("start handling")
|
||||
jobs = scrape_jobs(
|
||||
site_name=self.sites_to_scrap,
|
||||
search_term=self.search_term,
|
||||
locations=self.locations,
|
||||
results_wanted=200,
|
||||
hours_old=48,
|
||||
country_indeed='israel',
|
||||
filter_by_title=self.title_filters
|
||||
)
|
||||
self.logger.info(f"Found {len(jobs)} jobs")
|
||||
new_jobs = self.jobRepository.insertManyIfNotFound(jobs)
|
||||
for newJob in new_jobs:
|
||||
await self.telegramBot.sendJob(newJob)
|
||||
self.logger.info("finished handling")
|
Loading…
Reference in New Issue