mirror of https://github.com/Bunsly/JobSpy
findall command works
parent
f010ce51b1
commit
257f24ef19
|
@ -1,4 +1,6 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
@ -48,8 +50,9 @@ def scrape_jobs(
|
||||||
hours_old: int = None,
|
hours_old: int = None,
|
||||||
enforce_annual_salary: bool = False,
|
enforce_annual_salary: bool = False,
|
||||||
verbose: int = 2,
|
verbose: int = 2,
|
||||||
|
filter_by_title:list[str] = None,
|
||||||
** kwargs,
|
** kwargs,
|
||||||
) -> pd.DataFrame:
|
) -> list[JobPost]:
|
||||||
"""
|
"""
|
||||||
Simultaneously scrapes job data from multiple job sites.
|
Simultaneously scrapes job data from multiple job sites.
|
||||||
:return: pandas dataframe containing job data
|
:return: pandas dataframe containing job data
|
||||||
|
@ -148,4 +151,13 @@ def scrape_jobs(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Future Error occurred: {e}")
|
logger.error(f"Future Error occurred: {e}")
|
||||||
|
|
||||||
return merged_jobs
|
def filter_jobs_by_title_name(job: JobPost):
|
||||||
|
for filter_title in filter_by_title:
|
||||||
|
if re.search(filter_title, job.title, re.IGNORECASE):
|
||||||
|
logger.info(f"job filtered out by title: {job.id} , {
|
||||||
|
job.title} , found {filter_title}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
return list(filter(filter_jobs_by_title_name, merged_jobs))
|
||||||
|
|
|
@ -1,48 +0,0 @@
|
||||||
import os
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from telegram import Bot, Update, ReplyKeyboardMarkup
|
|
||||||
from telegram.ext import (
|
|
||||||
Application,
|
|
||||||
CommandHandler,
|
|
||||||
ContextTypes,
|
|
||||||
ConversationHandler,
|
|
||||||
MessageHandler,
|
|
||||||
filters,
|
|
||||||
)
|
|
||||||
from .scrapers.utils import create_logger
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
logger = create_logger("TelegramBot")
|
|
||||||
|
|
||||||
|
|
||||||
class TelegramHandler:
|
|
||||||
def __init__(self):
|
|
||||||
self._api_token = os.getenv("TELEGRAM_API_TOKEN")
|
|
||||||
self.chatId = os.getenv("TELEGRAM_CHAT_ID")
|
|
||||||
self.bot = Bot(token=self._api_token)
|
|
||||||
# Create the Application and pass it your bot's token.
|
|
||||||
self.application = Application.builder().token(self._api_token).build()
|
|
||||||
|
|
||||||
async def findAll(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|
||||||
"""Start the conversation and ask user for input."""
|
|
||||||
await update.message.reply_text(
|
|
||||||
"Hi! My name is Doctor Botter. I will hold a more complex conversation with you. "
|
|
||||||
"Why don't you tell me something about yourself?"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def find_glassdoor(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|
||||||
"""Start the conversation and ask user for input."""
|
|
||||||
await update.message.reply_text(
|
|
||||||
"Hi! My name is Doctor Botter. I will hold a more complex conversation with you. "
|
|
||||||
"Why don't you tell me something about yourself?"
|
|
||||||
)
|
|
||||||
|
|
||||||
def handler(self):
|
|
||||||
self.application.add_handler(CommandHandler("find", self.findAll))
|
|
||||||
self.application.add_handler(CommandHandler("galssdoor", self.find_glassdoor))
|
|
||||||
self.application.add_handler(CommandHandler("linkedin", self.findAll))
|
|
||||||
self.application.add_handler(CommandHandler("indeed", self.findAll))
|
|
||||||
self.application.add_handler(CommandHandler("goozali", self.findAll))
|
|
||||||
# Run the bot until the user presses Ctrl-C
|
|
||||||
self.application.run_polling(allowed_updates=Update.ALL_TYPES)
|
|
|
@ -1,26 +1,28 @@
|
||||||
import asyncio
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from telegram import Update
|
||||||
from telegram.ext import Application, CommandHandler
|
from telegram.ext import Application, CommandHandler
|
||||||
|
|
||||||
from src.jobspy import Site, scrape_jobs
|
from src.jobspy import Site, scrape_jobs
|
||||||
from src.jobspy.db.job_repository import JobRepository
|
from src.jobspy.db.job_repository import JobRepository
|
||||||
from src.jobspy.jobs import JobPost
|
from src.jobspy.jobs import JobPost
|
||||||
from src.jobspy.scrapers.utils import create_logger
|
from src.jobspy.scrapers.utils import create_logger
|
||||||
from src.jobspy.telegram_bot import TelegramBot
|
from src.telegram_bot import TelegramBot
|
||||||
from src.jobspy.telegram_handler import TelegramHandler
|
from src.telegram_handler import TelegramAllHandler
|
||||||
|
|
||||||
logger = create_logger("Main")
|
logger = create_logger("Main")
|
||||||
filter_by_title: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end", "Front End", "DevOps", "Physical", "Staff",
|
title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "Fullstack", "Frontend", "Front-end",
|
||||||
"automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"]
|
"Front End", "DevOps", "Physical", "Staff",
|
||||||
|
"automation", "BI", "Principal", "Architect", "Android", "Machine Learning", "Student",
|
||||||
|
"Data Engineer", "DevSecOps"]
|
||||||
|
|
||||||
|
|
||||||
def filter_jobs_by_title_name(job: JobPost):
|
def filter_jobs_by_title_name(job: JobPost):
|
||||||
for filter_title in filter_by_title:
|
for filter_title in title_filters:
|
||||||
if re.search(filter_title, job.title, re.IGNORECASE):
|
if re.search(filter_title, job.title, re.IGNORECASE):
|
||||||
logger.info(f"job filtered out by title: {job.id} , {
|
logger.info(f"job filtered out by title: {job.id} , {
|
||||||
job.title} , found {filter_title}")
|
job.title} , found {filter_title}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -29,7 +31,6 @@ def filter_jobs_by_title_name(job: JobPost):
|
||||||
async def main():
|
async def main():
|
||||||
telegramBot = TelegramBot()
|
telegramBot = TelegramBot()
|
||||||
jobRepository = JobRepository()
|
jobRepository = JobRepository()
|
||||||
tg_handler = TelegramHandler()
|
|
||||||
# sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]
|
# sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]
|
||||||
sites_to_scrap = [Site.GOOZALI]
|
sites_to_scrap = [Site.GOOZALI]
|
||||||
# sites_to_scrap = [Site.GOOZALI]
|
# sites_to_scrap = [Site.GOOZALI]
|
||||||
|
@ -48,14 +49,21 @@ async def main():
|
||||||
for newJob in newJobs:
|
for newJob in newJobs:
|
||||||
await telegramBot.sendJob(newJob)
|
await telegramBot.sendJob(newJob)
|
||||||
|
|
||||||
|
|
||||||
# Run the async main function
|
# Run the async main function
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# asyncio.run(main())
|
# asyncio.run(main())
|
||||||
|
logger.info("Starting initialize ")
|
||||||
_api_token = os.getenv("TELEGRAM_API_TOKEN")
|
_api_token = os.getenv("TELEGRAM_API_TOKEN")
|
||||||
|
tg_handler_all = TelegramAllHandler(sites=[Site.GOOZALI],
|
||||||
|
locations=["Tel Aviv, Israel", "Ramat Gan, Israel",
|
||||||
|
"Central, Israel", "Rehovot ,Israel"],
|
||||||
|
title_filters=title_filters)
|
||||||
application = Application.builder().token(_api_token).build()
|
application = Application.builder().token(_api_token).build()
|
||||||
application.add_handler(CommandHandler("find", findAll))
|
application.add_handler(CommandHandler("findAll", tg_handler_all.handle))
|
||||||
application.add_handler(CommandHandler("galssdoor", find_glassdoor))
|
# application.add_handler(CommandHandler("galssdoor", find_glassdoor))
|
||||||
application.add_handler(CommandHandler("linkedin", find_linkedin))
|
# application.add_handler(CommandHandler("linkedin", find_linkedin))
|
||||||
application.add_handler(CommandHandler("indeed", find_indeed))
|
# application.add_handler(CommandHandler("indeed", find_indeed))
|
||||||
application.add_handler(CommandHandler("goozali", find_goozali))
|
# application.add_handler(CommandHandler("goozali", find_goozali))
|
||||||
tg_handler = TelegramHandler().handler()
|
logger.info("Run polling from telegram")
|
||||||
|
application.run_polling(allowed_updates=Update.ALL_TYPES)
|
|
@ -9,8 +9,8 @@ from telegram.ext import (
|
||||||
MessageHandler,
|
MessageHandler,
|
||||||
filters,
|
filters,
|
||||||
)
|
)
|
||||||
from .jobs import JobPost
|
from src.jobspy.jobs import JobPost
|
||||||
from .scrapers.utils import create_logger
|
from src.jobspy.scrapers.utils import create_logger
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
@ -23,10 +23,6 @@ class TelegramBot:
|
||||||
self._api_token = os.getenv("TELEGRAM_API_TOKEN")
|
self._api_token = os.getenv("TELEGRAM_API_TOKEN")
|
||||||
self.chatId = os.getenv("TELEGRAM_CHAT_ID")
|
self.chatId = os.getenv("TELEGRAM_CHAT_ID")
|
||||||
self.bot = Bot(token=self._api_token)
|
self.bot = Bot(token=self._api_token)
|
||||||
# Create the Application and pass it your bot's token.
|
|
||||||
self.application = Application.builder().token(self._api_token).build()
|
|
||||||
# Run the bot until the user presses Ctrl-C
|
|
||||||
self.application.run_polling(allowed_updates=Update.ALL_TYPES)
|
|
||||||
|
|
||||||
async def sendJob(self, job: JobPost):
|
async def sendJob(self, job: JobPost):
|
||||||
"""
|
"""
|
|
@ -0,0 +1,37 @@
|
||||||
|
from telegram import Update
|
||||||
|
from telegram.ext import (
|
||||||
|
ContextTypes,
|
||||||
|
)
|
||||||
|
|
||||||
|
from src.jobspy import Site, scrape_jobs
|
||||||
|
from src.jobspy.db.job_repository import JobRepository
|
||||||
|
from src.jobspy.scrapers.utils import create_logger
|
||||||
|
from src.telegram_bot import TelegramBot
|
||||||
|
|
||||||
|
logger = create_logger("TelegramAllHandler")
|
||||||
|
|
||||||
|
|
||||||
|
class TelegramAllHandler:
|
||||||
|
def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str]):
|
||||||
|
self.sites_to_scrap = sites
|
||||||
|
self.locations = locations
|
||||||
|
self.title_filters = title_filters
|
||||||
|
self.telegramBot = TelegramBot()
|
||||||
|
self.jobRepository = JobRepository()
|
||||||
|
|
||||||
|
async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||||
|
logger.info("start handling")
|
||||||
|
jobs = scrape_jobs(
|
||||||
|
site_name=self.sites_to_scrap,
|
||||||
|
search_term="software engineer",
|
||||||
|
locations=self.locations,
|
||||||
|
results_wanted=200,
|
||||||
|
hours_old=48,
|
||||||
|
country_indeed='israel',
|
||||||
|
filter_by_title=self.title_filters
|
||||||
|
)
|
||||||
|
logger.info(f"Found {len(jobs)} jobs")
|
||||||
|
new_jobs = self.jobRepository.insertManyIfNotFound(jobs)
|
||||||
|
for newJob in new_jobs:
|
||||||
|
await self.telegramBot.sendJob(newJob)
|
||||||
|
logger.info("finished handling")
|
Loading…
Reference in New Issue