mirror of https://github.com/Bunsly/JobSpy
commit
69a420710e
|
@ -3,15 +3,15 @@ requires = [ "poetry-core",]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "python-jobspy"
|
||||
name = "python-JobSeekerTG"
|
||||
version = "1.1.76"
|
||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||
authors = [ "Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>",]
|
||||
homepage = "https://github.com/Bunsly/JobSpy"
|
||||
authors = [ "YM "]
|
||||
homepage = "https://github.com/yariv245/JobSeekerTG"
|
||||
readme = "README.md"
|
||||
keywords = [ "jobs-scraper", "linkedin", "indeed", "glassdoor", "ziprecruiter",]
|
||||
[[tool.poetry.packages]]
|
||||
include = "jobspy"
|
||||
include = "JobSeekerTG"
|
||||
from = "src"
|
||||
|
||||
[tool.black]
|
||||
|
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
|
@ -0,0 +1,17 @@
|
|||
from cachebox import LRUCache
|
||||
|
||||
|
||||
class CacheboxCacheManager:
|
||||
def __init__(self):
|
||||
self._cache = LRUCache(50)
|
||||
|
||||
def find(self, cache_id: str):
|
||||
"""Finding cached data by id, else None"""
|
||||
return self._cache.get(cache_id)
|
||||
|
||||
def save(self, cache_id: str, data):
|
||||
"""Finding cached data by id, else None"""
|
||||
self._cache.insert(cache_id, data)
|
||||
|
||||
|
||||
cache_manager = CacheboxCacheManager()
|
|
@ -1,34 +0,0 @@
|
|||
import os
|
||||
|
||||
from pymongo import MongoClient
|
||||
from pymongo.synchronous.database import Database
|
||||
|
||||
from config.settings import settings
|
||||
from jobspy import create_logger
|
||||
|
||||
|
||||
class MongoDB:
|
||||
_instance = None
|
||||
db:Database = None
|
||||
def __new__(cls):
|
||||
|
||||
if cls._instance is not None:
|
||||
return cls._instance
|
||||
|
||||
self = super().__new__(cls)
|
||||
cls._instance = self
|
||||
logger = create_logger("Mongo Client")
|
||||
mongoUri = settings.mongo_uri
|
||||
if not mongoUri:
|
||||
logger.error("MONGO_URI environment variable is not set")
|
||||
raise ValueError("MONGO_URI environment variable is not set")
|
||||
client = MongoClient(mongoUri)
|
||||
database_name = settings.mongo_db_name
|
||||
if not database_name:
|
||||
logger.error("MONGO_DB_NAME environment variable is not set")
|
||||
raise ValueError(
|
||||
"MONGO_DB_NAME environment variable is not set")
|
||||
|
||||
self.db = client[database_name]
|
||||
logger.info("Succeed connect to MongoDB")
|
||||
return cls._instance
|
|
@ -1,29 +0,0 @@
|
|||
from .model import GoozaliColumn
|
||||
|
||||
|
||||
job_post_column_to_goozali_column = {
|
||||
"date_posted": "Discovered",
|
||||
"field": "Field",
|
||||
"title": "Job Title",
|
||||
"job_url": "Position Link",
|
||||
"company_name": "Company",
|
||||
"description": "Requirements",
|
||||
"location": "Location",
|
||||
"company_industry": "Company Industry",
|
||||
"id": "Job ID"
|
||||
}
|
||||
|
||||
job_post_column_names = ["id",
|
||||
"date_posted",
|
||||
"field",
|
||||
"title",
|
||||
"job_url",
|
||||
"company_name",
|
||||
"description",
|
||||
"location",
|
||||
"company_industry"]
|
||||
|
||||
|
||||
# Key mapper: Extract 'name' as the key
|
||||
def extract_goozali_column_name(column): return column.name if isinstance(
|
||||
column, GoozaliColumn) else None
|
44
src/main.py
44
src/main.py
|
@ -1,13 +1,13 @@
|
|||
import os
|
||||
|
||||
from telegram import Update
|
||||
from telegram.ext import Application, CommandHandler, CallbackQueryHandler, Updater
|
||||
from telegram.ext import Application, CommandHandler, CallbackQueryHandler
|
||||
|
||||
from config.settings import settings
|
||||
from jobspy.scrapers.site import Site
|
||||
from jobspy.scrapers.utils import create_logger
|
||||
from scrapers import Site
|
||||
from scrapers.utils import create_logger
|
||||
from telegram_handler import TelegramDefaultHandler
|
||||
from telegram_handler.button_callback.telegram_callback_handler import TelegramCallHandler
|
||||
from telegram_handler.telegram_myinfo_handler import my_info_handler
|
||||
from telegram_handler.telegram_start_handler import start_conv_handler
|
||||
|
||||
logger = create_logger("Main")
|
||||
_api_token = settings.telegram_api_token
|
||||
|
@ -17,52 +17,34 @@ title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "F
|
|||
"automation", "BI ", "Principal", "Architect", "Android", "Machine Learning", "Student",
|
||||
"Data Engineer", "DevSecOps"]
|
||||
|
||||
|
||||
async def stop(update, context):
|
||||
logger.info("Stop polling from telegram")
|
||||
application.stop_running()
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("Starting initialize ")
|
||||
search_term = "software engineer"
|
||||
locations = ["Tel Aviv, Israel", "Ramat Gan, Israel",
|
||||
"Central, Israel", "Rehovot ,Israel"]
|
||||
application.add_handler(start_conv_handler)
|
||||
tg_callback_handler = TelegramCallHandler()
|
||||
tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI])
|
||||
application.add_handler(CommandHandler("find", tg_handler_all.handle))
|
||||
# Goozali
|
||||
tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI])
|
||||
application.add_handler(CommandHandler(
|
||||
Site.GOOZALI.value, tg_handler_goozali.handle))
|
||||
# GlassDoor
|
||||
tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR])
|
||||
application.add_handler(CommandHandler(
|
||||
Site.GLASSDOOR.value, tg_handler_glassdoor.handle))
|
||||
# LinkeDin
|
||||
tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN])
|
||||
application.add_handler(CommandHandler(
|
||||
Site.LINKEDIN.value, tg_handler_linkedin.handle))
|
||||
# Indeed
|
||||
tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED],
|
||||
locations=locations,
|
||||
title_filters=title_filters,
|
||||
search_term=search_term)
|
||||
tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED])
|
||||
application.add_handler(CommandHandler(
|
||||
Site.INDEED.value, tg_handler_indeed.handle))
|
||||
application.add_handler(CommandHandler(
|
||||
"myInfo", my_info_handler.handle))
|
||||
application.add_handler(CallbackQueryHandler(
|
||||
tg_callback_handler.button_callback))
|
||||
application.add_handler(CommandHandler('stop', stop))
|
||||
logger.info("Run polling from telegram")
|
||||
application.run_polling(allowed_updates=Update.ALL_TYPES)
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class Position(str, Enum):
|
||||
BACKEND_DEVELOPER = "Backend Developer"
|
||||
FULLSTACK_DEVELOPER = "Fullstack Developer"
|
||||
FRONTEND_DEVELOPER = "Frontend Developer"
|
||||
DATA_SCIENTIST = "Data Scientist"
|
||||
DATA_ANALYST = "Data Analyst"
|
||||
PROJECT_MANAGER = "Project Manager"
|
||||
CLOUD_ENGINEER = "Cloud Engineer"
|
||||
CLOUD_ARCHITECT = "Cloud Architect"
|
||||
UX_UI_DESIGNER = "UX/UI Designer"
|
||||
PRODUCT_MANAGER = "Product Manager"
|
||||
DEV_OPS_ENGINEER = "DevOps Engineer"
|
||||
BUSINESS_ANALYST = "Business Analyst"
|
||||
CYBERSECURITY_ENGINEER = "Cybersecurity Engineer"
|
||||
MACHINE_LEARNING_ENGINEER = "Machine Learning Engineer"
|
||||
ARTIFICIAL_INTELLIGENCE_ENGINEER = "Artificial Intelligence Engineer"
|
||||
DATABASE_ADMINISTRATOR = "Database Administrator"
|
||||
SYSTEMS_ADMINISTRATOR = "Systems Administrator"
|
||||
NETWORK_ENGINEER = "Network Engineer"
|
||||
TECHNICAL_SUPPORT_SPECIALIST = "Technical Support Specialist"
|
||||
SALES_ENGINEER = "Sales Engineer"
|
||||
SCRUM_MASTER = "Scrum Master"
|
||||
IT_MANAGER = "IT Manager"
|
|
@ -0,0 +1,34 @@
|
|||
from typing import Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from model.Position import Position
|
||||
|
||||
|
||||
class User(BaseModel):
|
||||
full_name: str
|
||||
username: str
|
||||
chat_id: Union[int, str] = None
|
||||
experience: Union[int, str] = None
|
||||
job_age: Union[int, str] = None
|
||||
position: Optional[Position] = None
|
||||
cities: Optional[list[str]] = None
|
||||
title_filters: Optional[list[str]] = None
|
||||
|
||||
def get_myinfo_message(self):
|
||||
message = "Here's your profile:\n\n"
|
||||
message += f"Full Name: {self.full_name}\n"
|
||||
message += f"Username: @{self.username}\n"
|
||||
if self.chat_id:
|
||||
message += f"Chat ID: {self.chat_id}\n"
|
||||
if self.job_age:
|
||||
message += f"Job Age (Hours): {self.experience}\n"
|
||||
if self.experience:
|
||||
message += f"Experience(Years): {self.experience}\n"
|
||||
if self.position:
|
||||
message += f"Position Level: {self.position.value}\n"
|
||||
if self.cities:
|
||||
message += f"Preferred Cities: {', '.join(self.cities)}\n"
|
||||
if self.title_filters:
|
||||
message += f"Job Title Filters: {', '.join(self.title_filters)}\n"
|
||||
return message
|
|
@ -0,0 +1,17 @@
|
|||
from bson.codec_options import TypeCodec
|
||||
|
||||
from model.Position import Position
|
||||
|
||||
|
||||
class PositionCodec(TypeCodec):
|
||||
python_type = Position
|
||||
bson_type = str
|
||||
|
||||
def transform_python(self, value):
|
||||
return value.name
|
||||
|
||||
def transform_bson(self, value):
|
||||
return Position(value)
|
||||
|
||||
|
||||
# position_codec = PositionCodec()
|
|
@ -3,27 +3,17 @@ from typing import Optional
|
|||
from dotenv import load_dotenv
|
||||
from pymongo import UpdateOne
|
||||
|
||||
from .monogo_db import MongoDB
|
||||
from jobspy import create_logger
|
||||
from jobspy.jobs import JobPost
|
||||
from scrapers import create_logger
|
||||
from jobs import JobPost
|
||||
from .monogo_db import mongo_client
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class JobRepository:
|
||||
_instance = None
|
||||
|
||||
def __new__(cls):
|
||||
|
||||
if cls._instance is not None:
|
||||
return cls._instance
|
||||
|
||||
self = super().__new__(cls)
|
||||
cls._instance = self
|
||||
self.logger = create_logger("JobRepository")
|
||||
mongo_client = MongoDB()
|
||||
self.collection = mongo_client.db["jobs"]
|
||||
return cls._instance
|
||||
def __init__(self):
|
||||
self._logger = create_logger("JobRepository")
|
||||
self._collection = mongo_client.get_collection('jobs')
|
||||
|
||||
def find_by_id(self, job_id: str) -> Optional[JobPost]:
|
||||
"""
|
||||
|
@ -35,7 +25,7 @@ class JobRepository:
|
|||
Returns:
|
||||
The job document if found, otherwise None.
|
||||
"""
|
||||
result = self.collection.find_one({"id": job_id})
|
||||
result = self._collection.find_one({"id": job_id})
|
||||
return JobPost(**result)
|
||||
|
||||
def update(self, job: JobPost) -> bool:
|
||||
|
@ -48,7 +38,7 @@ class JobRepository:
|
|||
Returns:
|
||||
True if the update was successful, False otherwise.
|
||||
"""
|
||||
result = self.collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})})
|
||||
result = self._collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})})
|
||||
return result.modified_count > 0
|
||||
|
||||
def insert_job(self, job: JobPost):
|
||||
|
@ -62,8 +52,8 @@ class JobRepository:
|
|||
Exception: If an error occurs during insertion.
|
||||
"""
|
||||
job_dict = job.model_dump(exclude={"date_posted"})
|
||||
self.collection.insert_one(job_dict)
|
||||
self.logger.info(f"Inserted new job with title {job.title}.")
|
||||
self._collection.insert_one(job_dict)
|
||||
self._logger.info(f"Inserted new job with title {job.title}.")
|
||||
|
||||
def insert_many_if_not_found(self, jobs: list[JobPost]) -> tuple[list[JobPost], list[JobPost]]:
|
||||
"""
|
||||
|
@ -86,8 +76,8 @@ class JobRepository:
|
|||
|
||||
if operations:
|
||||
# Execute all operations in bulk
|
||||
result = self.collection.bulk_write(operations)
|
||||
self.logger.info(f"Matched: {result.matched_count}, Upserts: {
|
||||
result = self._collection.bulk_write(operations)
|
||||
self._logger.info(f"Matched: {result.matched_count}, Upserts: {
|
||||
result.upserted_count}, Modified: {result.modified_count}")
|
||||
|
||||
# Get the newly inserted jobs (those that were upserted)
|
||||
|
@ -99,3 +89,5 @@ class JobRepository:
|
|||
old_jobs.append(job)
|
||||
|
||||
return old_jobs, new_jobs
|
||||
|
||||
job_repository = JobRepository()
|
|
@ -0,0 +1,38 @@
|
|||
from pymongo import MongoClient
|
||||
from pymongo.synchronous.database import Database
|
||||
|
||||
from config.settings import settings
|
||||
from scrapers.utils import create_logger
|
||||
|
||||
|
||||
class MongoDB:
|
||||
def __init__(self):
|
||||
logger = create_logger("Mongo Client")
|
||||
mongo_uri = settings.mongo_uri
|
||||
if not mongo_uri:
|
||||
logger.error("MONGO_URI environment variable is not set")
|
||||
raise ValueError("MONGO_URI environment variable is not set")
|
||||
client = MongoClient(mongo_uri)
|
||||
database_name = settings.mongo_db_name
|
||||
if not database_name:
|
||||
logger.error("MONGO_DB_NAME environment variable is not set")
|
||||
raise ValueError(
|
||||
"MONGO_DB_NAME environment variable is not set")
|
||||
|
||||
self._db: Database = client[database_name]
|
||||
logger.info("Succeed connect to MongoDB")
|
||||
|
||||
def get_collection(self,
|
||||
name: str,
|
||||
codec_options=None,
|
||||
read_preference=None,
|
||||
write_concern=None,
|
||||
read_concern=None):
|
||||
return self._db.get_collection(name,
|
||||
codec_options,
|
||||
read_preference,
|
||||
write_concern,
|
||||
read_concern)
|
||||
|
||||
|
||||
mongo_client = MongoDB()
|
|
@ -0,0 +1,129 @@
|
|||
from typing import Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pymongo import UpdateOne
|
||||
|
||||
from config.cache_manager import cache_manager
|
||||
from scrapers.utils import create_logger
|
||||
from .User import User
|
||||
from .monogo_db import mongo_client
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class UserRepository:
|
||||
def __init__(self):
|
||||
self._logger = create_logger("UserRepository")
|
||||
self._collection = mongo_client.get_collection('user')
|
||||
self._collection.create_index('username', unique=True)
|
||||
|
||||
def find_by_id(self, user_id: str) -> Optional[User]:
|
||||
"""
|
||||
Finds a user document in the collection by its ID.
|
||||
|
||||
Args:
|
||||
user_id: The ID of the user to find.
|
||||
|
||||
Returns:
|
||||
The user document if found, otherwise None.
|
||||
"""
|
||||
user = None
|
||||
cached_user = cache_manager.find(user_id)
|
||||
if cached_user:
|
||||
return cached_user
|
||||
|
||||
result = self._collection.find_one({"id": user_id})
|
||||
|
||||
if result:
|
||||
user = User(**result)
|
||||
cache_manager.save(user_id, user)
|
||||
|
||||
return user
|
||||
|
||||
def find_by_username(self, username: str) -> Optional[User]:
|
||||
"""
|
||||
Finds a user document in the collection by its username.
|
||||
|
||||
Args:
|
||||
username: The username of the user to find.
|
||||
|
||||
Returns:
|
||||
The user document if found, otherwise None.
|
||||
"""
|
||||
user = None
|
||||
cached_user = cache_manager.find(username)
|
||||
if cached_user:
|
||||
return cached_user
|
||||
|
||||
result = self._collection.find_one({"username": username})
|
||||
self._logger.info("find user by usernameeeeeeee")
|
||||
if result:
|
||||
user = User(**result)
|
||||
cache_manager.save(username, user)
|
||||
|
||||
return user
|
||||
|
||||
def update(self, user: User) -> bool:
|
||||
"""
|
||||
Updates a User in the database.
|
||||
|
||||
Args:
|
||||
user: A dictionary representing the User data.
|
||||
|
||||
Returns:
|
||||
True if the update was successful, False otherwise.
|
||||
"""
|
||||
result = self._collection.update_one({"username": user.username}, {"$set": user.model_dump()})
|
||||
return result.modified_count > 0
|
||||
|
||||
def insert_user(self, user: User):
|
||||
"""
|
||||
Inserts a new user posting into the database collection.
|
||||
|
||||
Args:
|
||||
user (User): The User object to be inserted.
|
||||
|
||||
Raises:
|
||||
Exception: If an error occurs during insertion.
|
||||
"""
|
||||
self._collection.insert_one(user.model_dump())
|
||||
cache_manager.save(user.username, user)
|
||||
self._logger.info(f"Inserted new user with username {user.username}.")
|
||||
|
||||
def insert_many_if_not_found(self, users: list[User]) -> tuple[list[User], list[User]]:
|
||||
"""
|
||||
Perform bulk upserts for a list of User objects into a MongoDB collection.
|
||||
Only insert new users and return the list of newly inserted users.
|
||||
"""
|
||||
operations = []
|
||||
new_users = [] # List to store the new users inserted into MongoDB
|
||||
old_users = [] # List to store the new users inserted into MongoDB
|
||||
for user in users:
|
||||
user_dict = user.model_dump()
|
||||
operations.append(
|
||||
UpdateOne(
|
||||
{"id": user.id}, # Match by `id`
|
||||
# Only set fields if the user is being inserted (not updated)
|
||||
{"$setOnInsert": user_dict},
|
||||
upsert=True # Insert if not found, but do not update if already exists
|
||||
)
|
||||
)
|
||||
|
||||
if operations:
|
||||
# Execute all operations in bulk
|
||||
result = self._collection.bulk_write(operations)
|
||||
self._logger.info(f"Matched: {result.matched_count}, Upserts: {
|
||||
result.upserted_count}, Modified: {result.modified_count}")
|
||||
|
||||
# Get the newly inserted users (those that were upserted)
|
||||
# The `upserted_count` corresponds to how many new documents were inserted
|
||||
for i, user in enumerate(users):
|
||||
if result.upserted_count > 0 and i < result.upserted_count:
|
||||
new_users.append(user)
|
||||
else:
|
||||
old_users.append(user)
|
||||
|
||||
return old_users, new_users
|
||||
|
||||
|
||||
user_repository = UserRepository()
|
|
@ -2,34 +2,36 @@ from __future__ import annotations
|
|||
|
||||
import re
|
||||
from threading import Lock
|
||||
|
||||
import pandas as pd
|
||||
from typing import Tuple
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from .scrapers.site import Site
|
||||
|
||||
from .scrapers.goozali import GoozaliScraper
|
||||
|
||||
from .jobs import JobPost, JobType, Location
|
||||
from .scrapers.utils import set_logger_level, extract_salary, create_logger
|
||||
from .scrapers.indeed import IndeedScraper
|
||||
from .scrapers.ziprecruiter import ZipRecruiterScraper
|
||||
from .scrapers.glassdoor import GlassdoorScraper
|
||||
from .scrapers.google import GoogleJobsScraper
|
||||
from .scrapers.linkedin import LinkedInScraper
|
||||
from .scrapers import SalarySource, ScraperInput, JobResponse, Country
|
||||
from .scrapers.exceptions import (
|
||||
LinkedInException,
|
||||
IndeedException,
|
||||
ZipRecruiterException,
|
||||
GlassdoorException,
|
||||
GoogleJobsException,
|
||||
from jobs import (
|
||||
Enum,
|
||||
JobType,
|
||||
JobResponse,
|
||||
Country,
|
||||
JobPost,
|
||||
)
|
||||
from model.User import User
|
||||
from .glassdoor import GlassdoorScraper
|
||||
from .google import GoogleJobsScraper
|
||||
from .goozali import GoozaliScraper
|
||||
from .indeed import IndeedScraper
|
||||
from .linkedin import LinkedInScraper
|
||||
from .scraper_input import ScraperInput
|
||||
from .site import Site
|
||||
from .utils import set_logger_level, create_logger
|
||||
from .ziprecruiter import ZipRecruiterScraper
|
||||
|
||||
|
||||
class SalarySource(Enum):
|
||||
DIRECT_DATA = "direct_data"
|
||||
DESCRIPTION = "description"
|
||||
|
||||
|
||||
def scrape_jobs(
|
||||
site_name: str | list[str] | Site | list[Site] | None = None,
|
||||
user: User = None,
|
||||
search_term: str | None = None,
|
||||
google_search_term: str | None = None,
|
||||
location: str | None = None,
|
||||
|
@ -55,7 +57,7 @@ def scrape_jobs(
|
|||
) -> (list[JobPost], list[JobPost]):
|
||||
"""
|
||||
Simultaneously scrapes job data from multiple job sites.
|
||||
:return: pandas dataframe containing job data
|
||||
:return: list of jobPost, list of new jobPost
|
||||
"""
|
||||
SCRAPER_MAPPING = {
|
||||
Site.LINKEDIN: LinkedInScraper,
|
||||
|
@ -93,6 +95,7 @@ def scrape_jobs(
|
|||
|
||||
country_enum = Country.from_string(country_indeed)
|
||||
scraper_input = ScraperInput(
|
||||
user=user,
|
||||
site_type=get_site_type(),
|
||||
country=country_enum,
|
||||
search_term=search_term,
|
||||
|
@ -111,7 +114,7 @@ def scrape_jobs(
|
|||
hours_old=hours_old
|
||||
)
|
||||
|
||||
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
|
||||
def scrape_site(site: Site) -> tuple[str, JobResponse]:
|
||||
scraper_class = SCRAPER_MAPPING[site]
|
||||
scraper = scraper_class(proxies=proxies, ca_cert=ca_cert)
|
||||
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
||||
|
@ -166,6 +169,10 @@ def scrape_jobs(
|
|||
"""
|
||||
filtered_jobs = []
|
||||
remaining_jobs = []
|
||||
|
||||
if not filter_by_title:
|
||||
return filtered_jobs, remaining_jobs
|
||||
|
||||
for job in jobs:
|
||||
for filter_title in filter_by_title:
|
||||
if re.search(filter_title, job.title, re.IGNORECASE):
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
jobspy.scrapers.exceptions
|
||||
scrapers.exceptions
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains the set of Scrapers' exceptions.
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
jobspy.scrapers.glassdoor
|
||||
scrapers.glassdoor
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape Glassdoor.
|
||||
|
@ -7,7 +7,6 @@ This module contains routines to scrape Glassdoor.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
import json
|
||||
import requests
|
||||
|
@ -18,14 +17,16 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|||
from .GlassDoorLocation import GlassDoorLocationResponse, get_location_id, get_location_type
|
||||
|
||||
from .constants import fallback_token, query_template, headers
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..scraper import Scraper
|
||||
from ..scraper_input import ScraperInput
|
||||
from ..site import Site
|
||||
from ..utils import extract_emails_from_text, create_logger
|
||||
from ..exceptions import GlassdoorException
|
||||
from ..utils import (
|
||||
create_session,
|
||||
markdown_converter,
|
||||
)
|
||||
from ...jobs import (
|
||||
from jobs import (
|
||||
JobPost,
|
||||
Compensation,
|
||||
CompensationInterval,
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
jobspy.scrapers.google
|
||||
scrapers.google
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape Google.
|
||||
|
@ -14,12 +14,14 @@ from typing import Tuple
|
|||
from datetime import datetime, timedelta
|
||||
|
||||
from .constants import headers_jobs, headers_initial, async_param
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..scraper import Scraper
|
||||
from ..scraper_input import ScraperInput
|
||||
from ..site import Site
|
||||
from ..utils import extract_emails_from_text, create_logger, extract_job_type
|
||||
from ..utils import (
|
||||
create_session,
|
||||
)
|
||||
from ...jobs import (
|
||||
from jobs import (
|
||||
JobPost,
|
||||
JobResponse,
|
||||
Location,
|
|
@ -1,7 +1,7 @@
|
|||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from jobspy.jobs import JobPost, Location
|
||||
from jobs import JobPost, Location
|
||||
from .model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliResponseData
|
||||
from .constants import job_post_column_to_goozali_column, job_post_column_names
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
from datetime import datetime, timedelta
|
||||
|
||||
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
|
||||
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliFieldChoice
|
||||
from ..utils import create_logger
|
||||
|
||||
# Mapping function to convert parsed dictionary into GoozaliResponseData
|
||||
|
@ -13,12 +13,20 @@ class GoozaliScrapperComponent:
|
|||
pass
|
||||
|
||||
# Function to filter GoozaliRows based on hours old
|
||||
def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn, column_choice: GoozaliColumnChoice) -> list[GoozaliRow]:
|
||||
def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn,
|
||||
column_choices: list[GoozaliColumnChoice]) -> list[GoozaliRow]:
|
||||
return [
|
||||
row for row in rows
|
||||
if row.cellValuesByColumnId[column.id] == column_choice.id
|
||||
row
|
||||
for row in rows
|
||||
if row.cellValuesByColumnId.get(column.id)
|
||||
and any(choice.id == row.cellValuesByColumnId[column.id] for choice in column_choices)
|
||||
]
|
||||
|
||||
# return [
|
||||
# row for row in rows
|
||||
# if row.cellValuesByColumnId[column.id] == column_choice.id
|
||||
# ]
|
||||
|
||||
def filter_rows_by_hours(self, rows: list[GoozaliRow], hours: int) -> list[GoozaliRow]:
|
||||
# Current time
|
||||
now = datetime.now()
|
||||
|
@ -39,14 +47,20 @@ class GoozaliScrapperComponent:
|
|||
if (column.name == column_name):
|
||||
return column
|
||||
|
||||
def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice:
|
||||
def find_choices_from_column(self, column: GoozaliColumn, choices: list[GoozaliFieldChoice]) -> list[
|
||||
GoozaliColumnChoice]:
|
||||
if not column.typeOptions.choices:
|
||||
logger.exception(f"Choices for column {column.name} doesn't exist")
|
||||
raise Exception(f"Choices for column {column.name} doesn't exist")
|
||||
chosen_values = [c.value for c in choices]
|
||||
goozali_column_choices = []
|
||||
|
||||
for key, choice in column.typeOptions.choices.items():
|
||||
if (choice.name == choice_name):
|
||||
return choice
|
||||
if choice.name in chosen_values:
|
||||
goozali_column_choices.append(choice)
|
||||
|
||||
logger.exception(f"Can't find {choice_name} for column {column.name}")
|
||||
raise Exception(f"Can't find {choice_name} for column {column.name}")
|
||||
if len(goozali_column_choices) == 0:
|
||||
logger.exception(f"Can't find {choices} for column {column.name}")
|
||||
raise Exception(f"Can't find {choices} for column {column.name}")
|
||||
|
||||
return goozali_column_choices
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
jobspy.scrapers.Goozali
|
||||
scrapers.Goozali
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape Goozali.
|
||||
|
@ -7,20 +7,20 @@ This module contains routines to scrape Goozali.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
from .. import Scraper, ScraperInput
|
||||
from .GoozaliMapper import GoozaliMapper
|
||||
from .GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column
|
||||
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
|
||||
from ..site import Site
|
||||
|
||||
from ..utils import create_dict_by_key_and_value, create_session, create_logger
|
||||
from ...jobs import (
|
||||
from jobs import (
|
||||
JobPost,
|
||||
JobResponse,
|
||||
)
|
||||
logger = create_logger("Goozali")
|
||||
from .GoozaliMapper import GoozaliMapper
|
||||
from .GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map
|
||||
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
|
||||
from ..scraper import Scraper
|
||||
from ..scraper_input import ScraperInput
|
||||
from ..site import Site
|
||||
from ..utils import create_dict_by_key_and_value, create_session, create_logger
|
||||
|
||||
logger = create_logger("GoozaliScraper")
|
||||
|
||||
|
||||
class GoozaliScraper(Scraper):
|
||||
|
@ -67,27 +67,24 @@ class GoozaliScraper(Scraper):
|
|||
logger.info(f"response: {str(response)}")
|
||||
if (response.status_code != 200):
|
||||
logger.error(f"Status code: {response.status_code}, Error: {
|
||||
str(response.text)}")
|
||||
str(response.text)}")
|
||||
return JobResponse(jobs=job_list)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception: {str(e)}")
|
||||
return JobResponse(jobs=job_list)
|
||||
# model the response with models
|
||||
goozali_response = self.mapper.map_response_to_goozali_response(
|
||||
response=response)
|
||||
# suggestL create groupby field and then filter by hours
|
||||
# filter result by Field
|
||||
column = self.component.find_column(
|
||||
goozali_response.data.columns, job_post_column_to_goozali_column["field"])
|
||||
column_choice = self.component.find_choice_from_column(
|
||||
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value)
|
||||
user_goozali_fields = position_to_goozali_field_map[scraper_input.user.position]
|
||||
column_choices = self.component.find_choices_from_column(
|
||||
column, user_goozali_fields)
|
||||
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
|
||||
goozali_response.data.rows, column, column_choice)
|
||||
goozali_response.data.rows, column, column_choices)
|
||||
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(
|
||||
filtered_rows_by_column_choice, scraper_input.hours_old)
|
||||
dict_column_name_to_column: dict[str, GoozaliColumn] = create_dict_by_key_and_value(
|
||||
goozali_response.data.columns, extract_goozali_column_name)
|
||||
# map to JobResponse Object
|
||||
for row in filtered_rows_by_age_and_column_choice:
|
||||
job_post = self.mapper.map_goozali_response_to_job_post(
|
||||
row, dict_column_name_to_column)
|
|
@ -0,0 +1,92 @@
|
|||
from model.Position import Position
|
||||
from .model import GoozaliColumn, GoozaliFieldChoice
|
||||
|
||||
job_post_column_to_goozali_column = {
|
||||
"date_posted": "Discovered",
|
||||
"field": "Field",
|
||||
"title": "Job Title",
|
||||
"job_url": "Position Link",
|
||||
"company_name": "Company",
|
||||
"description": "Requirements",
|
||||
"location": "Location",
|
||||
"company_industry": "Company Industry",
|
||||
"id": "Job ID"
|
||||
}
|
||||
|
||||
job_post_column_names = ["id",
|
||||
"date_posted",
|
||||
"field",
|
||||
"title",
|
||||
"job_url",
|
||||
"company_name",
|
||||
"description",
|
||||
"location",
|
||||
"company_industry"]
|
||||
|
||||
fields = ["Product Management",
|
||||
"Data Analyst",
|
||||
"Data Science, ML & Algorithms",
|
||||
"Software Engineering",
|
||||
"QA",
|
||||
"Cybersecurity",
|
||||
"IT and System Administration",
|
||||
"Frontend Development",
|
||||
"DevOps",
|
||||
"UI/UX, Design & Content",
|
||||
"HR & Recruitment",
|
||||
"Mobile Development",
|
||||
"Hardware Engineering",
|
||||
"Embedded, Low Level & Firmware Engineering",
|
||||
"Customer Success",
|
||||
"Project Management",
|
||||
"Operations",
|
||||
"Finance",
|
||||
"Systems Engineering",
|
||||
"Marketing",
|
||||
"Sales",
|
||||
"Compliance, Legal & Policy",
|
||||
"C-Level",
|
||||
"Business Development",
|
||||
"Mechanical Engineering",
|
||||
"Natural Science",
|
||||
"Other"]
|
||||
|
||||
def create_position_to_goozali_field_map():
|
||||
"""
|
||||
Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary mapping Position to a list of GoozaliFieldChoice.
|
||||
"""
|
||||
position_to_goozali_map = {
|
||||
Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
|
||||
Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
|
||||
Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING],
|
||||
Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
|
||||
Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST],
|
||||
Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
|
||||
Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||
Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||
Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT],
|
||||
Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT],
|
||||
Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS],
|
||||
Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT],
|
||||
Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY],
|
||||
Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
|
||||
Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
|
||||
Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||
Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||
Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||
Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||
Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES],
|
||||
Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
|
||||
Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||
}
|
||||
return position_to_goozali_map
|
||||
|
||||
# Get the map
|
||||
position_to_goozali_field_map = create_position_to_goozali_field_map()
|
||||
|
||||
# Key mapper: Extract 'name' as the key
|
||||
def extract_goozali_column_name(column): return column.name if isinstance(
|
||||
column, GoozaliColumn) else None
|
|
@ -6,13 +6,13 @@ class GoozaliFullRequest():
|
|||
self.view_id: str = "viwIOzPYaUGxlA0Jd"
|
||||
self.url = base_url.format(view_id=self.view_id)
|
||||
self.application_id: str = "appwewqLk7iUY4azc"
|
||||
self.air_table_page_load_id: str = "pglqAAzFDZEWCEC7s"
|
||||
self.air_table_page_load_id: str = "pglke45UFwdvQgBNJ"
|
||||
self.stringifiedObjectParams = {
|
||||
"shouldUseNestedResponseFormat": "true"}
|
||||
self.cookies: dict[str, str] = {}
|
||||
self.request_id: str = "req4q4tKw3woEEWxw&"
|
||||
self.request_id: str = "reqGjlEjOQFyRssam"
|
||||
self.share_id: str = "shrQBuWjXd0YgPqV6"
|
||||
self.signature: str = "be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59"
|
||||
self.signature: str = "7a1402a3f7f6f9a23c8db3849878812f2d3141da60f3b3d6e14dd4a910b91b74"
|
||||
self.headers = self._generate_headers()
|
||||
self.params = self._generate_params()
|
||||
self.cookies = {}
|
||||
|
@ -66,7 +66,7 @@ class GoozaliFullRequest():
|
|||
"shareId": self.share_id,
|
||||
"applicationId": self.application_id,
|
||||
"generationNumber": 0,
|
||||
"expires": "2025-01-02T00:00:00.000Z",
|
||||
"expires": "2025-01-30T00:00:00.000Z",
|
||||
"signature": self.signature
|
||||
}
|
||||
# Convert to a JSON string
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
jobspy.scrapers.indeed
|
||||
scrapers.indeed
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape Indeed.
|
||||
|
@ -12,7 +12,9 @@ from typing import Tuple
|
|||
from datetime import datetime
|
||||
|
||||
from .constants import job_search_query, api_headers
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..scraper import Scraper
|
||||
from ..scraper_input import ScraperInput
|
||||
from ..site import Site
|
||||
from ..utils import (
|
||||
extract_emails_from_text,
|
||||
get_enum_from_job_type,
|
||||
|
@ -20,7 +22,7 @@ from ..utils import (
|
|||
create_session,
|
||||
create_logger,
|
||||
)
|
||||
from ...jobs import (
|
||||
from jobs import (
|
||||
JobPost,
|
||||
Compensation,
|
||||
CompensationInterval,
|
||||
|
@ -35,7 +37,7 @@ logger = create_logger("Indeed")
|
|||
|
||||
class IndeedScraper(Scraper):
|
||||
def __init__(
|
||||
self, proxies: list[str] | str | None = None, ca_cert: str | None = None
|
||||
self, proxies: list[str] | str | None = None, ca_cert: str | None = None
|
||||
):
|
||||
"""
|
||||
Initializes IndeedScraper with the Indeed API url
|
||||
|
@ -74,7 +76,7 @@ class IndeedScraper(Scraper):
|
|||
while len(self.seen_urls) < scraper_input.results_wanted + scraper_input.offset:
|
||||
logger.info(
|
||||
f"search page: {
|
||||
page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}"
|
||||
page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}"
|
||||
)
|
||||
jobs, cursor = self._scrape_page(cursor, location)
|
||||
if not jobs:
|
||||
|
@ -85,9 +87,9 @@ class IndeedScraper(Scraper):
|
|||
|
||||
return JobResponse(
|
||||
jobs=job_list[
|
||||
scraper_input.offset: scraper_input.offset
|
||||
+ scraper_input.results_wanted
|
||||
]
|
||||
scraper_input.offset: scraper_input.offset
|
||||
+ scraper_input.results_wanted
|
||||
]
|
||||
)
|
||||
|
||||
def _scrape_page(self, cursor: str | None, location: str) -> Tuple[list[JobPost], str | None]:
|
||||
|
@ -108,7 +110,7 @@ class IndeedScraper(Scraper):
|
|||
what=(f'what: "{search_term}"' if search_term else ""),
|
||||
location=(
|
||||
f'location: {{where: "{location}", radius: {
|
||||
self.scraper_input.distance}, radiusUnit: MILES}}'
|
||||
self.scraper_input.distance}, radiusUnit: MILES}}'
|
||||
if location
|
||||
else ""
|
||||
),
|
||||
|
@ -130,7 +132,7 @@ class IndeedScraper(Scraper):
|
|||
if not response.ok:
|
||||
logger.info(
|
||||
f"responded with status code: {
|
||||
response.status_code} (submit GitHub issue if this appears to be a bug)"
|
||||
response.status_code} (submit GitHub issue if this appears to be a bug)"
|
||||
)
|
||||
return jobs, new_cursor
|
||||
data = response.json()
|
||||
|
@ -232,7 +234,7 @@ class IndeedScraper(Scraper):
|
|||
company_name=job["employer"].get(
|
||||
"name") if job.get("employer") else None,
|
||||
company_url=(f"{self.base_url}{
|
||||
rel_url}" if job["employer"] else None),
|
||||
rel_url}" if job["employer"] else None),
|
||||
company_url_direct=(
|
||||
employer["links"]["corporateWebsite"] if employer else None
|
||||
),
|
||||
|
@ -345,7 +347,7 @@ class IndeedScraper(Scraper):
|
|||
for keyword in remote_keywords
|
||||
)
|
||||
return (
|
||||
is_remote_in_attributes or is_remote_in_description or is_remote_in_location
|
||||
is_remote_in_attributes or is_remote_in_description or is_remote_in_location
|
||||
)
|
||||
|
||||
@staticmethod
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
jobspy.scrapers.linkedin
|
||||
scrapers.linkedin
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape LinkedIn.
|
||||
|
@ -17,13 +17,15 @@ from datetime import datetime
|
|||
from bs4.element import Tag
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse, urlunparse, unquote
|
||||
from requests.exceptions import RetryError, RequestException
|
||||
from requests.exceptions import RetryError
|
||||
from urllib3.exceptions import MaxRetryError
|
||||
from .constants import headers
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..scraper import Scraper
|
||||
from ..scraper_input import ScraperInput
|
||||
from ..site import Site
|
||||
from ..exceptions import LinkedInException
|
||||
from ..utils import create_session, remove_attributes, create_logger
|
||||
from ...jobs import (
|
||||
from jobs import (
|
||||
JobPost,
|
||||
Location,
|
||||
JobResponse,
|
|
@ -0,0 +1,17 @@
|
|||
from abc import ABC, abstractmethod
|
||||
|
||||
from jobs import JobResponse
|
||||
from scrapers.site import Site
|
||||
from scrapers.scraper_input import ScraperInput
|
||||
|
||||
|
||||
class Scraper(ABC):
|
||||
def __init__(
|
||||
self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None
|
||||
):
|
||||
self.site = site
|
||||
self.proxies = proxies
|
||||
self.ca_cert = ca_cert
|
||||
|
||||
@abstractmethod
|
||||
def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...
|
|
@ -1,25 +1,13 @@
|
|||
from __future__ import annotations
|
||||
from pydantic import BaseModel
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .site import Site
|
||||
from ..jobs import (
|
||||
Enum,
|
||||
BaseModel,
|
||||
JobType,
|
||||
JobResponse,
|
||||
Country,
|
||||
DescriptionFormat,
|
||||
)
|
||||
|
||||
|
||||
class SalarySource(Enum):
|
||||
DIRECT_DATA = "direct_data"
|
||||
DESCRIPTION = "description"
|
||||
from jobs import Country, JobType, DescriptionFormat
|
||||
from model.User import User
|
||||
from scrapers.site import Site
|
||||
|
||||
|
||||
class ScraperInput(BaseModel):
|
||||
site_type: list[Site]
|
||||
user: User
|
||||
search_term: str | None = None
|
||||
google_search_term: str | None = None
|
||||
|
||||
|
@ -37,15 +25,3 @@ class ScraperInput(BaseModel):
|
|||
|
||||
results_wanted: int = 15
|
||||
hours_old: int | None = None
|
||||
|
||||
|
||||
class Scraper(ABC):
|
||||
def __init__(
|
||||
self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None
|
||||
):
|
||||
self.site = site
|
||||
self.proxies = proxies
|
||||
self.ca_cert = ca_cert
|
||||
|
||||
@abstractmethod
|
||||
def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...
|
|
@ -11,11 +11,11 @@ import numpy as np
|
|||
from markdownify import markdownify as md
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
|
||||
from ..jobs import CompensationInterval, JobType
|
||||
from jobs import CompensationInterval, JobType
|
||||
|
||||
|
||||
def create_logger(name: str):
|
||||
logger = logging.getLogger(f"JobSpy:{name}")
|
||||
logger = logging.getLogger(f"JobSeekerTG:{name}")
|
||||
logger.propagate = False
|
||||
if not logger.handlers:
|
||||
logger.setLevel(logging.INFO)
|
||||
|
@ -143,7 +143,7 @@ def set_logger_level(verbose: int = 2):
|
|||
level = getattr(logging, level_name.upper(), None)
|
||||
if level is not None:
|
||||
for logger_name in logging.root.manager.loggerDict:
|
||||
if logger_name.startswith("JobSpy:"):
|
||||
if logger_name.startswith("JobSeekerTG:"):
|
||||
logging.getLogger(logger_name).setLevel(level)
|
||||
else:
|
||||
raise ValueError(f"Invalid log level: {level_name}")
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
jobspy.scrapers.ziprecruiter
|
||||
scrapers.ziprecruiter
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape ZipRecruiter.
|
||||
|
@ -19,7 +19,9 @@ from concurrent.futures import ThreadPoolExecutor
|
|||
from bs4 import BeautifulSoup
|
||||
|
||||
from .constants import headers
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..site import Site
|
||||
from ..scraper import Scraper
|
||||
from ..scraper_input import ScraperInput
|
||||
from ..utils import (
|
||||
extract_emails_from_text,
|
||||
create_session,
|
||||
|
@ -27,7 +29,7 @@ from ..utils import (
|
|||
remove_attributes,
|
||||
create_logger,
|
||||
)
|
||||
from ...jobs import (
|
||||
from jobs import (
|
||||
JobPost,
|
||||
Compensation,
|
||||
Location,
|
|
@ -1,4 +1,3 @@
|
|||
import os
|
||||
from typing import Union
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
@ -6,8 +5,8 @@ from telegram import Bot, InlineKeyboardButton, InlineKeyboardMarkup
|
|||
from telegram.constants import ReactionEmoji
|
||||
|
||||
from config.settings import settings
|
||||
from jobspy.jobs import JobPost
|
||||
from jobspy.scrapers.utils import create_logger
|
||||
from jobs import JobPost
|
||||
from scrapers.utils import create_logger
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
|
|
@ -3,8 +3,8 @@ from __future__ import annotations
|
|||
from telegram import MaybeInaccessibleMessage
|
||||
from telegram.constants import ReactionEmoji
|
||||
|
||||
from db.job_repository import JobRepository
|
||||
from jobspy import create_logger
|
||||
from scrapers import create_logger
|
||||
from model.job_repository import job_repository
|
||||
from telegram_handler.button_callback.button_fire_strategy import FireStrategy
|
||||
from telegram_handler.button_callback.button_job_title_strategy import JobTitleStrategy
|
||||
from telegram_handler.button_callback.button_poo_strategy import PooStrategy
|
||||
|
@ -22,7 +22,6 @@ class ButtonCallBackContext:
|
|||
self._data = data
|
||||
self._job_id = job_id
|
||||
self._strategy = None
|
||||
self._job_repository = JobRepository()
|
||||
|
||||
@property
|
||||
def strategy(self) -> ButtonStrategy:
|
||||
|
@ -49,10 +48,10 @@ class ButtonCallBackContext:
|
|||
elif ReactionEmoji.PILE_OF_POO.name == self._data:
|
||||
self._strategy = PooStrategy(self._message)
|
||||
elif self._data:
|
||||
job = self._job_repository.find_by_id(self._data)
|
||||
job = job_repository.find_by_id(self._data)
|
||||
if job:
|
||||
chat_id = self._message.chat.id
|
||||
self._strategy = JobTitleStrategy(chat_id,job)
|
||||
self._strategy = JobTitleStrategy(chat_id, job)
|
||||
else:
|
||||
self._logger.error("Invalid enum value")
|
||||
return
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from telegram import MaybeInaccessibleMessage
|
||||
from telegram.constants import ReactionEmoji
|
||||
|
||||
from db.job_repository import JobRepository
|
||||
from jobspy import create_logger
|
||||
from scrapers import create_logger
|
||||
from model.job_repository import job_repository
|
||||
from telegram_bot import TelegramBot
|
||||
from telegram_handler.button_callback.button_strategy import ButtonStrategy
|
||||
|
||||
|
@ -16,16 +16,15 @@ class FireStrategy(ButtonStrategy):
|
|||
self._message = message
|
||||
self._emoji = ReactionEmoji.FIRE
|
||||
self._telegram_bot = TelegramBot()
|
||||
self._job_repository = JobRepository()
|
||||
self._job_id = job_id
|
||||
self._logger = create_logger("FireStrategy")
|
||||
|
||||
async def execute(self):
|
||||
job = self._job_repository.find_by_id(self._job_id)
|
||||
job = job_repository.find_by_id(self._job_id)
|
||||
if not job:
|
||||
self._logger.error(f"Job with ID {self._job_id} not found.")
|
||||
return
|
||||
job.applied = True
|
||||
self._job_repository.update(job)
|
||||
job_repository.update(job)
|
||||
chat_id = self._message.chat.id
|
||||
await self._telegram_bot.set_message_reaction(chat_id, self._message.message_id, self._emoji)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from typing import Union
|
||||
|
||||
from jobspy import JobPost
|
||||
from scrapers import JobPost
|
||||
from telegram_bot import TelegramBot
|
||||
from telegram_handler.button_callback.button_strategy import ButtonStrategy
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ from telegram.ext import (
|
|||
ContextTypes,
|
||||
)
|
||||
|
||||
from jobspy import create_logger
|
||||
from scrapers import create_logger
|
||||
from telegram_bot import TelegramBot
|
||||
from telegram_handler.button_callback.button_callback_context import ButtonCallBackContext
|
||||
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
START_MESSAGE: str = "Hi there! I'm JobSeeker Bot, your friendly job search assistant.😊\n" \
|
||||
"I'm here to help you find the perfect position.\n\n" \
|
||||
"To stop chatting with me at any time, just send '/cancel'.\n\n"
|
||||
|
||||
POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \
|
||||
"(e.g., Software Engineer, Data Scientist, Marketing Manager)"
|
||||
|
||||
POSITION_NOT_FOUND: str = "I couldn't find any positions matching your request. 😕\n" \
|
||||
"Please try again"
|
||||
multi_value_message: str = "Enter multiple values separated by commas (e.g., value1, value2, value3) ✍️"
|
||||
|
||||
LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \
|
||||
"(e.g., Rishon Lezion, New York City, San Francisco)\n\n" + multi_value_message
|
||||
|
||||
EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n"
|
||||
|
||||
EXPERIENCE_INVALID: str = "Oops! Please enter your experience in years as a number.😕" \
|
||||
"For example, 2, 5, or 10."
|
||||
|
||||
JOB_AGE_MESSAGE: str = "How recent should the jobs be? ⏰\n" \
|
||||
"(Enter the number of hours, e.g., 24 for last 24 hours, 168 for last week)"
|
||||
|
||||
# JOB_AGE_MESSAGE: str = "Within how many hours do you want to see jobs posted? ⏰\n" \
|
||||
# "(Enter a number, e.g., 48 for the last 48 hours)"
|
||||
|
||||
JOB_AGE_INVALID: str = "Oops! Please enter a number for the number of hours. 😕\n" \
|
||||
"For example, 24, 48, or 168."
|
||||
|
||||
FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any NOT relevant tags or keywords.\n" \
|
||||
"For example: 'remote', 'BI', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message
|
||||
|
||||
THANK_YOU_MESSAGE: str = "Thank you for chatting with JobSeeker Bot!\n\n" \
|
||||
"I can help you find jobs on LinkedIn, Glassdoor, and more."
|
||||
|
||||
SEARCH_MESSAGE: str = "To search for jobs on a specific site, simply send the site name:\n" \
|
||||
"/linkedin\n" \
|
||||
"/indeed\n" \
|
||||
"/glassdoor\n" \
|
||||
"/goozali\n\n" \
|
||||
"Or, use the command /find to search across all supported job boards for a broader search.\n\n" \
|
||||
"Let me know how I can assist you further! 😊"
|
||||
|
||||
BYE_MESSAGE: str = "Have a great day!✨\n" \
|
||||
"I hope to assist you with your job search in the future.😊"
|
||||
|
||||
VERIFY_MESSAGE: str = "Did you choose: %s ? 🧐"
|
|
@ -4,9 +4,10 @@ from telegram.ext import (
|
|||
ContextTypes,
|
||||
)
|
||||
|
||||
from db.job_repository import JobRepository
|
||||
from jobspy import Site, scrape_jobs, JobPost
|
||||
from jobspy.scrapers.utils import create_logger
|
||||
from scrapers import Site, scrape_jobs, JobPost
|
||||
from scrapers.utils import create_logger
|
||||
from model.job_repository import JobRepository
|
||||
from model.user_repository import user_repository
|
||||
from telegram_bot import TelegramBot
|
||||
from telegram_handler.telegram_handler import TelegramHandler
|
||||
|
||||
|
@ -33,11 +34,8 @@ def map_jobs_to_keyboard(jobs: list[JobPost]) -> InlineKeyboardMarkup:
|
|||
|
||||
|
||||
class TelegramDefaultHandler(TelegramHandler):
|
||||
def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str], search_term: str):
|
||||
def __init__(self, sites: list[Site]):
|
||||
self.sites_to_scrap = sites
|
||||
self.locations = locations
|
||||
self.search_term = search_term
|
||||
self.title_filters = title_filters
|
||||
self.telegram_bot = TelegramBot()
|
||||
self.jobRepository = JobRepository()
|
||||
if len(sites) == 1:
|
||||
|
@ -51,17 +49,21 @@ class TelegramDefaultHandler(TelegramHandler):
|
|||
chat_id = update.message.chat.id
|
||||
await self.telegram_bot.set_message_reaction(chat_id,
|
||||
update.message.message_id, ReactionEmoji.FIRE)
|
||||
user = user_repository.find_by_username(update.message.from_user.username)
|
||||
|
||||
site_names = [site.name for site in self.sites_to_scrap]
|
||||
site_names_print = ", ".join(site_names)
|
||||
# locations = [location + ", Israel" for location in user.cities]
|
||||
await self.telegram_bot.send_text(chat_id,
|
||||
f"Start scarping: {site_names_print}")
|
||||
filtered_out_jobs, jobs = scrape_jobs(
|
||||
site_name=self.sites_to_scrap,
|
||||
search_term=self.search_term,
|
||||
locations=self.locations,
|
||||
user=user,
|
||||
search_term=user.position.value,
|
||||
locations=user.cities,
|
||||
results_wanted=200,
|
||||
hours_old=48,
|
||||
filter_by_title=self.title_filters,
|
||||
hours_old=int(user.job_age),
|
||||
filter_by_title=user.title_filters,
|
||||
country_indeed='israel'
|
||||
)
|
||||
self.logger.info(f"Found {len(jobs)} jobs")
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
from telegram import Update
|
||||
from telegram.constants import ReactionEmoji
|
||||
from telegram.ext import (
|
||||
ContextTypes,
|
||||
)
|
||||
|
||||
from scrapers.utils import create_logger
|
||||
from model.user_repository import user_repository
|
||||
from telegram_bot import TelegramBot
|
||||
from telegram_handler.telegram_handler import TelegramHandler
|
||||
|
||||
|
||||
class MyInfoTelegramHandler(TelegramHandler):
|
||||
def __init__(self):
|
||||
self.telegram_bot = TelegramBot()
|
||||
self._logger = create_logger("MyInfoTelegramHandler")
|
||||
|
||||
async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
self._logger.info("start handling")
|
||||
chat_id = update.message.chat.id
|
||||
await self.telegram_bot.set_message_reaction(chat_id,
|
||||
update.message.message_id, ReactionEmoji.FIRE)
|
||||
user = user_repository.find_by_username(update.message.from_user.username)
|
||||
await self.telegram_bot.send_text(chat_id, user.get_myinfo_message())
|
||||
|
||||
self._logger.info("finished handling")
|
||||
|
||||
|
||||
my_info_handler = MyInfoTelegramHandler()
|
|
@ -0,0 +1,216 @@
|
|||
from enum import Enum
|
||||
|
||||
from telegram import Update, Chat, KeyboardButton, ReplyKeyboardMarkup, ReplyKeyboardRemove
|
||||
from telegram.constants import ReactionEmoji
|
||||
from telegram.ext import (
|
||||
ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters,
|
||||
)
|
||||
|
||||
from config.cache_manager import cache_manager
|
||||
from model.Position import Position
|
||||
from model.User import User
|
||||
from model.user_repository import user_repository
|
||||
from scrapers.utils import create_logger
|
||||
from telegram_bot import TelegramBot
|
||||
from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \
|
||||
LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE, \
|
||||
SEARCH_MESSAGE, EXPERIENCE_INVALID, JOB_AGE_INVALID, JOB_AGE_MESSAGE
|
||||
|
||||
|
||||
class Flow(Enum):
|
||||
POSITION = 0
|
||||
ADDRESS = 1
|
||||
FILTERS = 2
|
||||
EXPERIENCE = 3
|
||||
VERIFY_ADDRESS = 4
|
||||
VERIFY_FILTERS = 5
|
||||
SKIP_FILTERS = 6
|
||||
JOB_AGE = 7
|
||||
|
||||
|
||||
class TelegramStartHandler:
|
||||
|
||||
def __init__(self):
|
||||
self.telegram_bot = TelegramBot()
|
||||
self.logger = create_logger("TelegramStartHandler")
|
||||
|
||||
async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Starts the conversation and asks the user about their position."""
|
||||
chat: Chat = update.message.chat
|
||||
user = user_repository.find_by_username(chat.username)
|
||||
if not user:
|
||||
user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id)
|
||||
user_repository.insert_user(user)
|
||||
|
||||
await update.message.reply_text(START_MESSAGE)
|
||||
|
||||
buttons = [[KeyboardButton(position.value)] for position in Position]
|
||||
reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True,
|
||||
input_field_placeholder=Flow.POSITION.name)
|
||||
await update.message.reply_text(
|
||||
POSITION_MESSAGE,
|
||||
reply_markup=reply_markup,
|
||||
)
|
||||
|
||||
return Flow.POSITION.value
|
||||
|
||||
async def position(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Stores the selected position and asks for a locations."""
|
||||
user = update.message.from_user
|
||||
self.logger.info("Position of %s: %s", user.first_name, update.message.text)
|
||||
position = next((p for p in Position if p.value == update.message.text), None)
|
||||
if not position:
|
||||
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
|
||||
await update.message.reply_text(POSITION_NOT_FOUND)
|
||||
buttons = [[KeyboardButton(position.value)] for position in Position]
|
||||
reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True,
|
||||
input_field_placeholder=Flow.POSITION.name)
|
||||
await update.message.reply_text(
|
||||
POSITION_MESSAGE,
|
||||
reply_markup=reply_markup,
|
||||
)
|
||||
return Flow.POSITION.value
|
||||
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
cached_user: User = cache_manager.find(user.username)
|
||||
cached_user.position = position
|
||||
cache_manager.save(cached_user.username, cached_user)
|
||||
await update.message.reply_text(LOCATION_MESSAGE)
|
||||
|
||||
return Flow.ADDRESS.value
|
||||
|
||||
async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Asks for a location."""
|
||||
cities = update.message.text.split(",")
|
||||
# Remove leading/trailing spaces from each city name
|
||||
cities = [city.strip() for city in cities]
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True,
|
||||
input_field_placeholder=Flow.VERIFY_ADDRESS.name)
|
||||
await update.message.reply_text(VERIFY_MESSAGE % cities, reply_markup=reply_markup)
|
||||
|
||||
cached_user: User = cache_manager.find(update.message.from_user.username)
|
||||
cached_user.cities = cities
|
||||
cache_manager.save(cached_user.username, cached_user)
|
||||
|
||||
return Flow.VERIFY_ADDRESS.value
|
||||
|
||||
async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Verify for a Address."""
|
||||
if update.message.text == "No":
|
||||
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
|
||||
await update.message.reply_text(LOCATION_MESSAGE)
|
||||
return Flow.ADDRESS.value
|
||||
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
await update.message.reply_text(EXPERIENCE_MESSAGE)
|
||||
|
||||
return Flow.EXPERIENCE.value
|
||||
|
||||
async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Asks for a experience."""
|
||||
user = update.message.from_user
|
||||
self.logger.info("Experience of %s: %s", user.first_name, update.message.text)
|
||||
|
||||
if not update.message.text.isnumeric():
|
||||
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
|
||||
await update.message.reply_text(EXPERIENCE_INVALID)
|
||||
await update.message.reply_text(EXPERIENCE_MESSAGE)
|
||||
|
||||
return Flow.EXPERIENCE.value
|
||||
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
cached_user: User = cache_manager.find(update.message.from_user.username)
|
||||
cached_user.experience = update.message.text
|
||||
cache_manager.save(cached_user.username, cached_user)
|
||||
await update.message.reply_text(JOB_AGE_MESSAGE)
|
||||
return Flow.JOB_AGE.value
|
||||
|
||||
async def job_age(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Asks for a Job age in hours."""
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
user = update.message.from_user
|
||||
self.logger.info("Job age of %s: %s", user.first_name, update.message.text)
|
||||
|
||||
if not update.message.text.isnumeric():
|
||||
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
|
||||
await update.message.reply_text(JOB_AGE_INVALID)
|
||||
await update.message.reply_text(JOB_AGE_MESSAGE)
|
||||
|
||||
return Flow.JOB_AGE.value
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
cached_user: User = cache_manager.find(update.message.from_user.username)
|
||||
cached_user.job_age = update.message.text
|
||||
cache_manager.save(cached_user.username, cached_user)
|
||||
await update.message.reply_text(
|
||||
FILTER_TILE_MESSAGE)
|
||||
|
||||
return Flow.FILTERS.value
|
||||
|
||||
async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Asks for a filters_flow."""
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
title_filters = update.message.text.split(",")
|
||||
# Remove leading/trailing spaces from each city name
|
||||
title_filters = [title_filter.strip() for title_filter in title_filters]
|
||||
reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True,
|
||||
input_field_placeholder=Flow.VERIFY_FILTERS.name)
|
||||
await update.message.reply_text(VERIFY_MESSAGE % title_filters, reply_markup=reply_markup)
|
||||
|
||||
cached_user: User = cache_manager.find(update.message.from_user.username)
|
||||
cached_user.title_filters = title_filters
|
||||
cache_manager.save(cached_user.username, cached_user)
|
||||
|
||||
return Flow.VERIFY_FILTERS.value
|
||||
|
||||
async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Verify for a filters_flow."""
|
||||
if update.message.text == "No":
|
||||
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
|
||||
await update.message.reply_text(FILTER_TILE_MESSAGE)
|
||||
return Flow.FILTERS.value
|
||||
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
await update.message.reply_text(THANK_YOU_MESSAGE)
|
||||
await update.message.reply_text(SEARCH_MESSAGE)
|
||||
cached_user: User = cache_manager.find(update.message.from_user.username)
|
||||
user_repository.update(cached_user)
|
||||
return ConversationHandler.END
|
||||
|
||||
async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Skips the location and asks for info about the user."""
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
user = update.message.from_user
|
||||
self.logger.info("User %s did not send a filters.", user.first_name)
|
||||
await update.message.reply_text(THANK_YOU_MESSAGE)
|
||||
await update.message.reply_text(SEARCH_MESSAGE)
|
||||
|
||||
return ConversationHandler.END
|
||||
|
||||
async def cancel(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
|
||||
"""Cancels and ends the conversation."""
|
||||
await update.message.set_reaction(ReactionEmoji.FIRE)
|
||||
user = update.message.from_user
|
||||
self.logger.info("User %s canceled the conversation.", user.first_name)
|
||||
await update.message.reply_text(
|
||||
BYE_MESSAGE, reply_markup=ReplyKeyboardRemove()
|
||||
)
|
||||
cached_user: User = cache_manager.find(user.username)
|
||||
user_repository.update(cached_user.username, cached_user)
|
||||
return ConversationHandler.END
|
||||
|
||||
|
||||
start_handler = TelegramStartHandler()
|
||||
start_conv_handler = ConversationHandler(
|
||||
entry_points=[CommandHandler("start", start_handler.start)],
|
||||
states={
|
||||
Flow.POSITION.value: [MessageHandler(filters.TEXT, start_handler.position)],
|
||||
Flow.ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.address)],
|
||||
Flow.VERIFY_ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.verify_address)],
|
||||
Flow.EXPERIENCE.value: [MessageHandler(filters.TEXT, start_handler.experience)],
|
||||
Flow.JOB_AGE.value: [MessageHandler(filters.TEXT, start_handler.job_age)],
|
||||
Flow.FILTERS.value: [MessageHandler(filters.TEXT, start_handler.filters_flow)],
|
||||
Flow.VERIFY_FILTERS.value: [MessageHandler(filters.TEXT, start_handler.verify_filter)],
|
||||
},
|
||||
fallbacks=[CommandHandler("cancel", start_handler.cancel)],
|
||||
)
|
|
@ -1,4 +1,4 @@
|
|||
from jobspy import scrape_jobs
|
||||
from scrapers import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from dotenv import load_dotenv
|
||||
|
||||
from db.job_repository import JobRepository
|
||||
from model.job_repository import JobRepository
|
||||
from tests.test_util import createMockJob
|
||||
|
||||
load_dotenv()
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from jobspy import scrape_jobs
|
||||
from scrapers import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from jobspy import scrape_jobs
|
||||
from scrapers import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
from jobspy.jobs import JobPost
|
||||
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
|
||||
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||
from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
|
||||
from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData
|
||||
from jobspy.scrapers.utils import create_dict_by_key_and_value
|
||||
from jobs import JobPost
|
||||
from scrapers.goozali.GoozaliMapper import GoozaliMapper
|
||||
from scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||
from scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
|
||||
from scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData
|
||||
from scrapers.utils import create_dict_by_key_and_value
|
||||
# URL Example
|
||||
# https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from jobspy import scrape_jobs
|
||||
from scrapers import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from jobspy import scrape_jobs
|
||||
from scrapers import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from datetime import datetime, date
|
||||
from typing import List
|
||||
|
||||
from jobspy import JobPost, Location, Country
|
||||
from scrapers import JobPost, Location, Country
|
||||
|
||||
|
||||
# Creating some test job posts
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from jobspy import scrape_jobs
|
||||
from scrapers import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue