Merge pull request #3 from yariv245/start_conv_handler

start_conv_handler
pull/231/head
Yariv Menachem 2025-01-06 16:25:26 +02:00 committed by GitHub
commit 69a420710e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
65 changed files with 849 additions and 275 deletions

View File

@ -3,15 +3,15 @@ requires = [ "poetry-core",]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "python-jobspy"
name = "python-JobSeekerTG"
version = "1.1.76"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = [ "Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>",]
homepage = "https://github.com/Bunsly/JobSpy"
authors = [ "YM "]
homepage = "https://github.com/yariv245/JobSeekerTG"
readme = "README.md"
keywords = [ "jobs-scraper", "linkedin", "indeed", "glassdoor", "ziprecruiter",]
[[tool.poetry.packages]]
include = "jobspy"
include = "JobSeekerTG"
from = "src"
[tool.black]

Binary file not shown.

View File

@ -0,0 +1,17 @@
from cachebox import LRUCache
class CacheboxCacheManager:
def __init__(self):
self._cache = LRUCache(50)
def find(self, cache_id: str):
"""Finding cached data by id, else None"""
return self._cache.get(cache_id)
def save(self, cache_id: str, data):
"""Finding cached data by id, else None"""
self._cache.insert(cache_id, data)
cache_manager = CacheboxCacheManager()

View File

@ -1,34 +0,0 @@
import os
from pymongo import MongoClient
from pymongo.synchronous.database import Database
from config.settings import settings
from jobspy import create_logger
class MongoDB:
_instance = None
db:Database = None
def __new__(cls):
if cls._instance is not None:
return cls._instance
self = super().__new__(cls)
cls._instance = self
logger = create_logger("Mongo Client")
mongoUri = settings.mongo_uri
if not mongoUri:
logger.error("MONGO_URI environment variable is not set")
raise ValueError("MONGO_URI environment variable is not set")
client = MongoClient(mongoUri)
database_name = settings.mongo_db_name
if not database_name:
logger.error("MONGO_DB_NAME environment variable is not set")
raise ValueError(
"MONGO_DB_NAME environment variable is not set")
self.db = client[database_name]
logger.info("Succeed connect to MongoDB")
return cls._instance

View File

@ -1,29 +0,0 @@
from .model import GoozaliColumn
job_post_column_to_goozali_column = {
"date_posted": "Discovered",
"field": "Field",
"title": "Job Title",
"job_url": "Position Link",
"company_name": "Company",
"description": "Requirements",
"location": "Location",
"company_industry": "Company Industry",
"id": "Job ID"
}
job_post_column_names = ["id",
"date_posted",
"field",
"title",
"job_url",
"company_name",
"description",
"location",
"company_industry"]
# Key mapper: Extract 'name' as the key
def extract_goozali_column_name(column): return column.name if isinstance(
column, GoozaliColumn) else None

View File

@ -1,13 +1,13 @@
import os
from telegram import Update
from telegram.ext import Application, CommandHandler, CallbackQueryHandler, Updater
from telegram.ext import Application, CommandHandler, CallbackQueryHandler
from config.settings import settings
from jobspy.scrapers.site import Site
from jobspy.scrapers.utils import create_logger
from scrapers import Site
from scrapers.utils import create_logger
from telegram_handler import TelegramDefaultHandler
from telegram_handler.button_callback.telegram_callback_handler import TelegramCallHandler
from telegram_handler.telegram_myinfo_handler import my_info_handler
from telegram_handler.telegram_start_handler import start_conv_handler
logger = create_logger("Main")
_api_token = settings.telegram_api_token
@ -17,52 +17,34 @@ title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "F
"automation", "BI ", "Principal", "Architect", "Android", "Machine Learning", "Student",
"Data Engineer", "DevSecOps"]
async def stop(update, context):
logger.info("Stop polling from telegram")
application.stop_running()
if __name__ == "__main__":
logger.info("Starting initialize ")
search_term = "software engineer"
locations = ["Tel Aviv, Israel", "Ramat Gan, Israel",
"Central, Israel", "Rehovot ,Israel"]
application.add_handler(start_conv_handler)
tg_callback_handler = TelegramCallHandler()
tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI],
locations=locations,
title_filters=title_filters,
search_term=search_term)
tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI])
application.add_handler(CommandHandler("find", tg_handler_all.handle))
# Goozali
tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI],
locations=locations,
title_filters=title_filters,
search_term=search_term)
tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI])
application.add_handler(CommandHandler(
Site.GOOZALI.value, tg_handler_goozali.handle))
# GlassDoor
tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR],
locations=locations,
title_filters=title_filters,
search_term=search_term)
tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR])
application.add_handler(CommandHandler(
Site.GLASSDOOR.value, tg_handler_glassdoor.handle))
# LinkeDin
tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN],
locations=locations,
title_filters=title_filters,
search_term=search_term)
tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN])
application.add_handler(CommandHandler(
Site.LINKEDIN.value, tg_handler_linkedin.handle))
# Indeed
tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED],
locations=locations,
title_filters=title_filters,
search_term=search_term)
tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED])
application.add_handler(CommandHandler(
Site.INDEED.value, tg_handler_indeed.handle))
application.add_handler(CommandHandler(
"myInfo", my_info_handler.handle))
application.add_handler(CallbackQueryHandler(
tg_callback_handler.button_callback))
application.add_handler(CommandHandler('stop', stop))
logger.info("Run polling from telegram")
application.run_polling(allowed_updates=Update.ALL_TYPES)

26
src/model/Position.py Normal file
View File

@ -0,0 +1,26 @@
from enum import Enum
class Position(str, Enum):
BACKEND_DEVELOPER = "Backend Developer"
FULLSTACK_DEVELOPER = "Fullstack Developer"
FRONTEND_DEVELOPER = "Frontend Developer"
DATA_SCIENTIST = "Data Scientist"
DATA_ANALYST = "Data Analyst"
PROJECT_MANAGER = "Project Manager"
CLOUD_ENGINEER = "Cloud Engineer"
CLOUD_ARCHITECT = "Cloud Architect"
UX_UI_DESIGNER = "UX/UI Designer"
PRODUCT_MANAGER = "Product Manager"
DEV_OPS_ENGINEER = "DevOps Engineer"
BUSINESS_ANALYST = "Business Analyst"
CYBERSECURITY_ENGINEER = "Cybersecurity Engineer"
MACHINE_LEARNING_ENGINEER = "Machine Learning Engineer"
ARTIFICIAL_INTELLIGENCE_ENGINEER = "Artificial Intelligence Engineer"
DATABASE_ADMINISTRATOR = "Database Administrator"
SYSTEMS_ADMINISTRATOR = "Systems Administrator"
NETWORK_ENGINEER = "Network Engineer"
TECHNICAL_SUPPORT_SPECIALIST = "Technical Support Specialist"
SALES_ENGINEER = "Sales Engineer"
SCRUM_MASTER = "Scrum Master"
IT_MANAGER = "IT Manager"

34
src/model/User.py Normal file
View File

@ -0,0 +1,34 @@
from typing import Optional, Union
from pydantic import BaseModel, Field
from model.Position import Position
class User(BaseModel):
full_name: str
username: str
chat_id: Union[int, str] = None
experience: Union[int, str] = None
job_age: Union[int, str] = None
position: Optional[Position] = None
cities: Optional[list[str]] = None
title_filters: Optional[list[str]] = None
def get_myinfo_message(self):
message = "Here's your profile:\n\n"
message += f"Full Name: {self.full_name}\n"
message += f"Username: @{self.username}\n"
if self.chat_id:
message += f"Chat ID: {self.chat_id}\n"
if self.job_age:
message += f"Job Age (Hours): {self.experience}\n"
if self.experience:
message += f"Experience(Years): {self.experience}\n"
if self.position:
message += f"Position Level: {self.position.value}\n"
if self.cities:
message += f"Preferred Cities: {', '.join(self.cities)}\n"
if self.title_filters:
message += f"Job Title Filters: {', '.join(self.title_filters)}\n"
return message

View File

View File

@ -0,0 +1,17 @@
from bson.codec_options import TypeCodec
from model.Position import Position
class PositionCodec(TypeCodec):
python_type = Position
bson_type = str
def transform_python(self, value):
return value.name
def transform_bson(self, value):
return Position(value)
# position_codec = PositionCodec()

View File

@ -3,27 +3,17 @@ from typing import Optional
from dotenv import load_dotenv
from pymongo import UpdateOne
from .monogo_db import MongoDB
from jobspy import create_logger
from jobspy.jobs import JobPost
from scrapers import create_logger
from jobs import JobPost
from .monogo_db import mongo_client
load_dotenv()
class JobRepository:
_instance = None
def __new__(cls):
if cls._instance is not None:
return cls._instance
self = super().__new__(cls)
cls._instance = self
self.logger = create_logger("JobRepository")
mongo_client = MongoDB()
self.collection = mongo_client.db["jobs"]
return cls._instance
def __init__(self):
self._logger = create_logger("JobRepository")
self._collection = mongo_client.get_collection('jobs')
def find_by_id(self, job_id: str) -> Optional[JobPost]:
"""
@ -35,7 +25,7 @@ class JobRepository:
Returns:
The job document if found, otherwise None.
"""
result = self.collection.find_one({"id": job_id})
result = self._collection.find_one({"id": job_id})
return JobPost(**result)
def update(self, job: JobPost) -> bool:
@ -48,7 +38,7 @@ class JobRepository:
Returns:
True if the update was successful, False otherwise.
"""
result = self.collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})})
result = self._collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})})
return result.modified_count > 0
def insert_job(self, job: JobPost):
@ -62,8 +52,8 @@ class JobRepository:
Exception: If an error occurs during insertion.
"""
job_dict = job.model_dump(exclude={"date_posted"})
self.collection.insert_one(job_dict)
self.logger.info(f"Inserted new job with title {job.title}.")
self._collection.insert_one(job_dict)
self._logger.info(f"Inserted new job with title {job.title}.")
def insert_many_if_not_found(self, jobs: list[JobPost]) -> tuple[list[JobPost], list[JobPost]]:
"""
@ -86,8 +76,8 @@ class JobRepository:
if operations:
# Execute all operations in bulk
result = self.collection.bulk_write(operations)
self.logger.info(f"Matched: {result.matched_count}, Upserts: {
result = self._collection.bulk_write(operations)
self._logger.info(f"Matched: {result.matched_count}, Upserts: {
result.upserted_count}, Modified: {result.modified_count}")
# Get the newly inserted jobs (those that were upserted)
@ -99,3 +89,5 @@ class JobRepository:
old_jobs.append(job)
return old_jobs, new_jobs
job_repository = JobRepository()

38
src/model/monogo_db.py Normal file
View File

@ -0,0 +1,38 @@
from pymongo import MongoClient
from pymongo.synchronous.database import Database
from config.settings import settings
from scrapers.utils import create_logger
class MongoDB:
def __init__(self):
logger = create_logger("Mongo Client")
mongo_uri = settings.mongo_uri
if not mongo_uri:
logger.error("MONGO_URI environment variable is not set")
raise ValueError("MONGO_URI environment variable is not set")
client = MongoClient(mongo_uri)
database_name = settings.mongo_db_name
if not database_name:
logger.error("MONGO_DB_NAME environment variable is not set")
raise ValueError(
"MONGO_DB_NAME environment variable is not set")
self._db: Database = client[database_name]
logger.info("Succeed connect to MongoDB")
def get_collection(self,
name: str,
codec_options=None,
read_preference=None,
write_concern=None,
read_concern=None):
return self._db.get_collection(name,
codec_options,
read_preference,
write_concern,
read_concern)
mongo_client = MongoDB()

View File

@ -0,0 +1,129 @@
from typing import Optional
from dotenv import load_dotenv
from pymongo import UpdateOne
from config.cache_manager import cache_manager
from scrapers.utils import create_logger
from .User import User
from .monogo_db import mongo_client
load_dotenv()
class UserRepository:
def __init__(self):
self._logger = create_logger("UserRepository")
self._collection = mongo_client.get_collection('user')
self._collection.create_index('username', unique=True)
def find_by_id(self, user_id: str) -> Optional[User]:
"""
Finds a user document in the collection by its ID.
Args:
user_id: The ID of the user to find.
Returns:
The user document if found, otherwise None.
"""
user = None
cached_user = cache_manager.find(user_id)
if cached_user:
return cached_user
result = self._collection.find_one({"id": user_id})
if result:
user = User(**result)
cache_manager.save(user_id, user)
return user
def find_by_username(self, username: str) -> Optional[User]:
"""
Finds a user document in the collection by its username.
Args:
username: The username of the user to find.
Returns:
The user document if found, otherwise None.
"""
user = None
cached_user = cache_manager.find(username)
if cached_user:
return cached_user
result = self._collection.find_one({"username": username})
self._logger.info("find user by usernameeeeeeee")
if result:
user = User(**result)
cache_manager.save(username, user)
return user
def update(self, user: User) -> bool:
"""
Updates a User in the database.
Args:
user: A dictionary representing the User data.
Returns:
True if the update was successful, False otherwise.
"""
result = self._collection.update_one({"username": user.username}, {"$set": user.model_dump()})
return result.modified_count > 0
def insert_user(self, user: User):
"""
Inserts a new user posting into the database collection.
Args:
user (User): The User object to be inserted.
Raises:
Exception: If an error occurs during insertion.
"""
self._collection.insert_one(user.model_dump())
cache_manager.save(user.username, user)
self._logger.info(f"Inserted new user with username {user.username}.")
def insert_many_if_not_found(self, users: list[User]) -> tuple[list[User], list[User]]:
"""
Perform bulk upserts for a list of User objects into a MongoDB collection.
Only insert new users and return the list of newly inserted users.
"""
operations = []
new_users = [] # List to store the new users inserted into MongoDB
old_users = [] # List to store the new users inserted into MongoDB
for user in users:
user_dict = user.model_dump()
operations.append(
UpdateOne(
{"id": user.id}, # Match by `id`
# Only set fields if the user is being inserted (not updated)
{"$setOnInsert": user_dict},
upsert=True # Insert if not found, but do not update if already exists
)
)
if operations:
# Execute all operations in bulk
result = self._collection.bulk_write(operations)
self._logger.info(f"Matched: {result.matched_count}, Upserts: {
result.upserted_count}, Modified: {result.modified_count}")
# Get the newly inserted users (those that were upserted)
# The `upserted_count` corresponds to how many new documents were inserted
for i, user in enumerate(users):
if result.upserted_count > 0 and i < result.upserted_count:
new_users.append(user)
else:
old_users.append(user)
return old_users, new_users
user_repository = UserRepository()

View File

@ -2,34 +2,36 @@ from __future__ import annotations
import re
from threading import Lock
import pandas as pd
from typing import Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
from .scrapers.site import Site
from .scrapers.goozali import GoozaliScraper
from .jobs import JobPost, JobType, Location
from .scrapers.utils import set_logger_level, extract_salary, create_logger
from .scrapers.indeed import IndeedScraper
from .scrapers.ziprecruiter import ZipRecruiterScraper
from .scrapers.glassdoor import GlassdoorScraper
from .scrapers.google import GoogleJobsScraper
from .scrapers.linkedin import LinkedInScraper
from .scrapers import SalarySource, ScraperInput, JobResponse, Country
from .scrapers.exceptions import (
LinkedInException,
IndeedException,
ZipRecruiterException,
GlassdoorException,
GoogleJobsException,
from jobs import (
Enum,
JobType,
JobResponse,
Country,
JobPost,
)
from model.User import User
from .glassdoor import GlassdoorScraper
from .google import GoogleJobsScraper
from .goozali import GoozaliScraper
from .indeed import IndeedScraper
from .linkedin import LinkedInScraper
from .scraper_input import ScraperInput
from .site import Site
from .utils import set_logger_level, create_logger
from .ziprecruiter import ZipRecruiterScraper
class SalarySource(Enum):
DIRECT_DATA = "direct_data"
DESCRIPTION = "description"
def scrape_jobs(
site_name: str | list[str] | Site | list[Site] | None = None,
user: User = None,
search_term: str | None = None,
google_search_term: str | None = None,
location: str | None = None,
@ -55,7 +57,7 @@ def scrape_jobs(
) -> (list[JobPost], list[JobPost]):
"""
Simultaneously scrapes job data from multiple job sites.
:return: pandas dataframe containing job data
:return: list of jobPost, list of new jobPost
"""
SCRAPER_MAPPING = {
Site.LINKEDIN: LinkedInScraper,
@ -93,6 +95,7 @@ def scrape_jobs(
country_enum = Country.from_string(country_indeed)
scraper_input = ScraperInput(
user=user,
site_type=get_site_type(),
country=country_enum,
search_term=search_term,
@ -111,7 +114,7 @@ def scrape_jobs(
hours_old=hours_old
)
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
def scrape_site(site: Site) -> tuple[str, JobResponse]:
scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class(proxies=proxies, ca_cert=ca_cert)
scraped_data: JobResponse = scraper.scrape(scraper_input)
@ -166,6 +169,10 @@ def scrape_jobs(
"""
filtered_jobs = []
remaining_jobs = []
if not filter_by_title:
return filtered_jobs, remaining_jobs
for job in jobs:
for filter_title in filter_by_title:
if re.search(filter_title, job.title, re.IGNORECASE):

View File

@ -1,5 +1,5 @@
"""
jobspy.scrapers.exceptions
scrapers.exceptions
~~~~~~~~~~~~~~~~~~~
This module contains the set of Scrapers' exceptions.

View File

@ -1,5 +1,5 @@
"""
jobspy.scrapers.glassdoor
scrapers.glassdoor
~~~~~~~~~~~~~~~~~~~
This module contains routines to scrape Glassdoor.
@ -7,7 +7,6 @@ This module contains routines to scrape Glassdoor.
from __future__ import annotations
from dataclasses import dataclass
import re
import json
import requests
@ -18,14 +17,16 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from .GlassDoorLocation import GlassDoorLocationResponse, get_location_id, get_location_type
from .constants import fallback_token, query_template, headers
from .. import Scraper, ScraperInput, Site
from ..scraper import Scraper
from ..scraper_input import ScraperInput
from ..site import Site
from ..utils import extract_emails_from_text, create_logger
from ..exceptions import GlassdoorException
from ..utils import (
create_session,
markdown_converter,
)
from ...jobs import (
from jobs import (
JobPost,
Compensation,
CompensationInterval,

View File

@ -1,5 +1,5 @@
"""
jobspy.scrapers.google
scrapers.google
~~~~~~~~~~~~~~~~~~~
This module contains routines to scrape Google.
@ -14,12 +14,14 @@ from typing import Tuple
from datetime import datetime, timedelta
from .constants import headers_jobs, headers_initial, async_param
from .. import Scraper, ScraperInput, Site
from ..scraper import Scraper
from ..scraper_input import ScraperInput
from ..site import Site
from ..utils import extract_emails_from_text, create_logger, extract_job_type
from ..utils import (
create_session,
)
from ...jobs import (
from jobs import (
JobPost,
JobResponse,
Location,

View File

@ -1,7 +1,7 @@
from datetime import datetime
import json
from jobspy.jobs import JobPost, Location
from jobs import JobPost, Location
from .model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliResponseData
from .constants import job_post_column_to_goozali_column, job_post_column_names

View File

@ -1,6 +1,6 @@
from datetime import datetime, timedelta
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliFieldChoice
from ..utils import create_logger
# Mapping function to convert parsed dictionary into GoozaliResponseData
@ -13,12 +13,20 @@ class GoozaliScrapperComponent:
pass
# Function to filter GoozaliRows based on hours old
def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn, column_choice: GoozaliColumnChoice) -> list[GoozaliRow]:
def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn,
column_choices: list[GoozaliColumnChoice]) -> list[GoozaliRow]:
return [
row for row in rows
if row.cellValuesByColumnId[column.id] == column_choice.id
row
for row in rows
if row.cellValuesByColumnId.get(column.id)
and any(choice.id == row.cellValuesByColumnId[column.id] for choice in column_choices)
]
# return [
# row for row in rows
# if row.cellValuesByColumnId[column.id] == column_choice.id
# ]
def filter_rows_by_hours(self, rows: list[GoozaliRow], hours: int) -> list[GoozaliRow]:
# Current time
now = datetime.now()
@ -39,14 +47,20 @@ class GoozaliScrapperComponent:
if (column.name == column_name):
return column
def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice:
def find_choices_from_column(self, column: GoozaliColumn, choices: list[GoozaliFieldChoice]) -> list[
GoozaliColumnChoice]:
if not column.typeOptions.choices:
logger.exception(f"Choices for column {column.name} doesn't exist")
raise Exception(f"Choices for column {column.name} doesn't exist")
chosen_values = [c.value for c in choices]
goozali_column_choices = []
for key, choice in column.typeOptions.choices.items():
if (choice.name == choice_name):
return choice
if choice.name in chosen_values:
goozali_column_choices.append(choice)
logger.exception(f"Can't find {choice_name} for column {column.name}")
raise Exception(f"Can't find {choice_name} for column {column.name}")
if len(goozali_column_choices) == 0:
logger.exception(f"Can't find {choices} for column {column.name}")
raise Exception(f"Can't find {choices} for column {column.name}")
return goozali_column_choices

View File

@ -1,5 +1,5 @@
"""
jobspy.scrapers.Goozali
scrapers.Goozali
~~~~~~~~~~~~~~~~~~~
This module contains routines to scrape Goozali.
@ -7,20 +7,20 @@ This module contains routines to scrape Goozali.
from __future__ import annotations
from .. import Scraper, ScraperInput
from .GoozaliMapper import GoozaliMapper
from .GoozaliScrapperComponent import GoozaliScrapperComponent
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
from ..site import Site
from ..utils import create_dict_by_key_and_value, create_session, create_logger
from ...jobs import (
from jobs import (
JobPost,
JobResponse,
)
logger = create_logger("Goozali")
from .GoozaliMapper import GoozaliMapper
from .GoozaliScrapperComponent import GoozaliScrapperComponent
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
from ..scraper import Scraper
from ..scraper_input import ScraperInput
from ..site import Site
from ..utils import create_dict_by_key_and_value, create_session, create_logger
logger = create_logger("GoozaliScraper")
class GoozaliScraper(Scraper):
@ -67,27 +67,24 @@ class GoozaliScraper(Scraper):
logger.info(f"response: {str(response)}")
if (response.status_code != 200):
logger.error(f"Status code: {response.status_code}, Error: {
str(response.text)}")
str(response.text)}")
return JobResponse(jobs=job_list)
except Exception as e:
logger.error(f"Exception: {str(e)}")
return JobResponse(jobs=job_list)
# model the response with models
goozali_response = self.mapper.map_response_to_goozali_response(
response=response)
# suggestL create groupby field and then filter by hours
# filter result by Field
column = self.component.find_column(
goozali_response.data.columns, job_post_column_to_goozali_column["field"])
column_choice = self.component.find_choice_from_column(
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value)
user_goozali_fields = position_to_goozali_field_map[scraper_input.user.position]
column_choices = self.component.find_choices_from_column(
column, user_goozali_fields)
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
goozali_response.data.rows, column, column_choice)
goozali_response.data.rows, column, column_choices)
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(
filtered_rows_by_column_choice, scraper_input.hours_old)
dict_column_name_to_column: dict[str, GoozaliColumn] = create_dict_by_key_and_value(
goozali_response.data.columns, extract_goozali_column_name)
# map to JobResponse Object
for row in filtered_rows_by_age_and_column_choice:
job_post = self.mapper.map_goozali_response_to_job_post(
row, dict_column_name_to_column)

View File

@ -0,0 +1,92 @@
from model.Position import Position
from .model import GoozaliColumn, GoozaliFieldChoice
job_post_column_to_goozali_column = {
"date_posted": "Discovered",
"field": "Field",
"title": "Job Title",
"job_url": "Position Link",
"company_name": "Company",
"description": "Requirements",
"location": "Location",
"company_industry": "Company Industry",
"id": "Job ID"
}
job_post_column_names = ["id",
"date_posted",
"field",
"title",
"job_url",
"company_name",
"description",
"location",
"company_industry"]
fields = ["Product Management",
"Data Analyst",
"Data Science, ML & Algorithms",
"Software Engineering",
"QA",
"Cybersecurity",
"IT and System Administration",
"Frontend Development",
"DevOps",
"UI/UX, Design & Content",
"HR & Recruitment",
"Mobile Development",
"Hardware Engineering",
"Embedded, Low Level & Firmware Engineering",
"Customer Success",
"Project Management",
"Operations",
"Finance",
"Systems Engineering",
"Marketing",
"Sales",
"Compliance, Legal & Policy",
"C-Level",
"Business Development",
"Mechanical Engineering",
"Natural Science",
"Other"]
def create_position_to_goozali_field_map():
"""
Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values.
Returns:
dict: A dictionary mapping Position to a list of GoozaliFieldChoice.
"""
position_to_goozali_map = {
Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST],
Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT],
Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT],
Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS],
Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT],
Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY],
Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES],
Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
}
return position_to_goozali_map
# Get the map
position_to_goozali_field_map = create_position_to_goozali_field_map()
# Key mapper: Extract 'name' as the key
def extract_goozali_column_name(column): return column.name if isinstance(
column, GoozaliColumn) else None

View File

@ -6,13 +6,13 @@ class GoozaliFullRequest():
self.view_id: str = "viwIOzPYaUGxlA0Jd"
self.url = base_url.format(view_id=self.view_id)
self.application_id: str = "appwewqLk7iUY4azc"
self.air_table_page_load_id: str = "pglqAAzFDZEWCEC7s"
self.air_table_page_load_id: str = "pglke45UFwdvQgBNJ"
self.stringifiedObjectParams = {
"shouldUseNestedResponseFormat": "true"}
self.cookies: dict[str, str] = {}
self.request_id: str = "req4q4tKw3woEEWxw&"
self.request_id: str = "reqGjlEjOQFyRssam"
self.share_id: str = "shrQBuWjXd0YgPqV6"
self.signature: str = "be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59"
self.signature: str = "7a1402a3f7f6f9a23c8db3849878812f2d3141da60f3b3d6e14dd4a910b91b74"
self.headers = self._generate_headers()
self.params = self._generate_params()
self.cookies = {}
@ -66,7 +66,7 @@ class GoozaliFullRequest():
"shareId": self.share_id,
"applicationId": self.application_id,
"generationNumber": 0,
"expires": "2025-01-02T00:00:00.000Z",
"expires": "2025-01-30T00:00:00.000Z",
"signature": self.signature
}
# Convert to a JSON string

View File

@ -1,5 +1,5 @@
"""
jobspy.scrapers.indeed
scrapers.indeed
~~~~~~~~~~~~~~~~~~~
This module contains routines to scrape Indeed.
@ -12,7 +12,9 @@ from typing import Tuple
from datetime import datetime
from .constants import job_search_query, api_headers
from .. import Scraper, ScraperInput, Site
from ..scraper import Scraper
from ..scraper_input import ScraperInput
from ..site import Site
from ..utils import (
extract_emails_from_text,
get_enum_from_job_type,
@ -20,7 +22,7 @@ from ..utils import (
create_session,
create_logger,
)
from ...jobs import (
from jobs import (
JobPost,
Compensation,
CompensationInterval,
@ -35,7 +37,7 @@ logger = create_logger("Indeed")
class IndeedScraper(Scraper):
def __init__(
self, proxies: list[str] | str | None = None, ca_cert: str | None = None
self, proxies: list[str] | str | None = None, ca_cert: str | None = None
):
"""
Initializes IndeedScraper with the Indeed API url
@ -74,7 +76,7 @@ class IndeedScraper(Scraper):
while len(self.seen_urls) < scraper_input.results_wanted + scraper_input.offset:
logger.info(
f"search page: {
page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}"
page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}"
)
jobs, cursor = self._scrape_page(cursor, location)
if not jobs:
@ -85,9 +87,9 @@ class IndeedScraper(Scraper):
return JobResponse(
jobs=job_list[
scraper_input.offset: scraper_input.offset
+ scraper_input.results_wanted
]
scraper_input.offset: scraper_input.offset
+ scraper_input.results_wanted
]
)
def _scrape_page(self, cursor: str | None, location: str) -> Tuple[list[JobPost], str | None]:
@ -108,7 +110,7 @@ class IndeedScraper(Scraper):
what=(f'what: "{search_term}"' if search_term else ""),
location=(
f'location: {{where: "{location}", radius: {
self.scraper_input.distance}, radiusUnit: MILES}}'
self.scraper_input.distance}, radiusUnit: MILES}}'
if location
else ""
),
@ -130,7 +132,7 @@ class IndeedScraper(Scraper):
if not response.ok:
logger.info(
f"responded with status code: {
response.status_code} (submit GitHub issue if this appears to be a bug)"
response.status_code} (submit GitHub issue if this appears to be a bug)"
)
return jobs, new_cursor
data = response.json()
@ -232,7 +234,7 @@ class IndeedScraper(Scraper):
company_name=job["employer"].get(
"name") if job.get("employer") else None,
company_url=(f"{self.base_url}{
rel_url}" if job["employer"] else None),
rel_url}" if job["employer"] else None),
company_url_direct=(
employer["links"]["corporateWebsite"] if employer else None
),
@ -345,7 +347,7 @@ class IndeedScraper(Scraper):
for keyword in remote_keywords
)
return (
is_remote_in_attributes or is_remote_in_description or is_remote_in_location
is_remote_in_attributes or is_remote_in_description or is_remote_in_location
)
@staticmethod

View File

@ -1,5 +1,5 @@
"""
jobspy.scrapers.linkedin
scrapers.linkedin
~~~~~~~~~~~~~~~~~~~
This module contains routines to scrape LinkedIn.
@ -17,13 +17,15 @@ from datetime import datetime
from bs4.element import Tag
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urlunparse, unquote
from requests.exceptions import RetryError, RequestException
from requests.exceptions import RetryError
from urllib3.exceptions import MaxRetryError
from .constants import headers
from .. import Scraper, ScraperInput, Site
from ..scraper import Scraper
from ..scraper_input import ScraperInput
from ..site import Site
from ..exceptions import LinkedInException
from ..utils import create_session, remove_attributes, create_logger
from ...jobs import (
from jobs import (
JobPost,
Location,
JobResponse,

17
src/scrapers/scraper.py Normal file
View File

@ -0,0 +1,17 @@
from abc import ABC, abstractmethod
from jobs import JobResponse
from scrapers.site import Site
from scrapers.scraper_input import ScraperInput
class Scraper(ABC):
def __init__(
self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None
):
self.site = site
self.proxies = proxies
self.ca_cert = ca_cert
@abstractmethod
def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...

View File

@ -1,25 +1,13 @@
from __future__ import annotations
from pydantic import BaseModel
from abc import ABC, abstractmethod
from .site import Site
from ..jobs import (
Enum,
BaseModel,
JobType,
JobResponse,
Country,
DescriptionFormat,
)
class SalarySource(Enum):
DIRECT_DATA = "direct_data"
DESCRIPTION = "description"
from jobs import Country, JobType, DescriptionFormat
from model.User import User
from scrapers.site import Site
class ScraperInput(BaseModel):
site_type: list[Site]
user: User
search_term: str | None = None
google_search_term: str | None = None
@ -37,15 +25,3 @@ class ScraperInput(BaseModel):
results_wanted: int = 15
hours_old: int | None = None
class Scraper(ABC):
def __init__(
self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None
):
self.site = site
self.proxies = proxies
self.ca_cert = ca_cert
@abstractmethod
def scrape(self, scraper_input: ScraperInput) -> JobResponse: ...

View File

@ -11,11 +11,11 @@ import numpy as np
from markdownify import markdownify as md
from requests.adapters import HTTPAdapter, Retry
from ..jobs import CompensationInterval, JobType
from jobs import CompensationInterval, JobType
def create_logger(name: str):
logger = logging.getLogger(f"JobSpy:{name}")
logger = logging.getLogger(f"JobSeekerTG:{name}")
logger.propagate = False
if not logger.handlers:
logger.setLevel(logging.INFO)
@ -143,7 +143,7 @@ def set_logger_level(verbose: int = 2):
level = getattr(logging, level_name.upper(), None)
if level is not None:
for logger_name in logging.root.manager.loggerDict:
if logger_name.startswith("JobSpy:"):
if logger_name.startswith("JobSeekerTG:"):
logging.getLogger(logger_name).setLevel(level)
else:
raise ValueError(f"Invalid log level: {level_name}")

View File

@ -1,5 +1,5 @@
"""
jobspy.scrapers.ziprecruiter
scrapers.ziprecruiter
~~~~~~~~~~~~~~~~~~~
This module contains routines to scrape ZipRecruiter.
@ -19,7 +19,9 @@ from concurrent.futures import ThreadPoolExecutor
from bs4 import BeautifulSoup
from .constants import headers
from .. import Scraper, ScraperInput, Site
from ..site import Site
from ..scraper import Scraper
from ..scraper_input import ScraperInput
from ..utils import (
extract_emails_from_text,
create_session,
@ -27,7 +29,7 @@ from ..utils import (
remove_attributes,
create_logger,
)
from ...jobs import (
from jobs import (
JobPost,
Compensation,
Location,

View File

@ -1,4 +1,3 @@
import os
from typing import Union
from dotenv import load_dotenv
@ -6,8 +5,8 @@ from telegram import Bot, InlineKeyboardButton, InlineKeyboardMarkup
from telegram.constants import ReactionEmoji
from config.settings import settings
from jobspy.jobs import JobPost
from jobspy.scrapers.utils import create_logger
from jobs import JobPost
from scrapers.utils import create_logger
load_dotenv()

View File

@ -3,8 +3,8 @@ from __future__ import annotations
from telegram import MaybeInaccessibleMessage
from telegram.constants import ReactionEmoji
from db.job_repository import JobRepository
from jobspy import create_logger
from scrapers import create_logger
from model.job_repository import job_repository
from telegram_handler.button_callback.button_fire_strategy import FireStrategy
from telegram_handler.button_callback.button_job_title_strategy import JobTitleStrategy
from telegram_handler.button_callback.button_poo_strategy import PooStrategy
@ -22,7 +22,6 @@ class ButtonCallBackContext:
self._data = data
self._job_id = job_id
self._strategy = None
self._job_repository = JobRepository()
@property
def strategy(self) -> ButtonStrategy:
@ -49,10 +48,10 @@ class ButtonCallBackContext:
elif ReactionEmoji.PILE_OF_POO.name == self._data:
self._strategy = PooStrategy(self._message)
elif self._data:
job = self._job_repository.find_by_id(self._data)
job = job_repository.find_by_id(self._data)
if job:
chat_id = self._message.chat.id
self._strategy = JobTitleStrategy(chat_id,job)
self._strategy = JobTitleStrategy(chat_id, job)
else:
self._logger.error("Invalid enum value")
return

View File

@ -1,8 +1,8 @@
from telegram import MaybeInaccessibleMessage
from telegram.constants import ReactionEmoji
from db.job_repository import JobRepository
from jobspy import create_logger
from scrapers import create_logger
from model.job_repository import job_repository
from telegram_bot import TelegramBot
from telegram_handler.button_callback.button_strategy import ButtonStrategy
@ -16,16 +16,15 @@ class FireStrategy(ButtonStrategy):
self._message = message
self._emoji = ReactionEmoji.FIRE
self._telegram_bot = TelegramBot()
self._job_repository = JobRepository()
self._job_id = job_id
self._logger = create_logger("FireStrategy")
async def execute(self):
job = self._job_repository.find_by_id(self._job_id)
job = job_repository.find_by_id(self._job_id)
if not job:
self._logger.error(f"Job with ID {self._job_id} not found.")
return
job.applied = True
self._job_repository.update(job)
job_repository.update(job)
chat_id = self._message.chat.id
await self._telegram_bot.set_message_reaction(chat_id, self._message.message_id, self._emoji)

View File

@ -1,6 +1,6 @@
from typing import Union
from jobspy import JobPost
from scrapers import JobPost
from telegram_bot import TelegramBot
from telegram_handler.button_callback.button_strategy import ButtonStrategy

View File

@ -3,7 +3,7 @@ from telegram.ext import (
ContextTypes,
)
from jobspy import create_logger
from scrapers import create_logger
from telegram_bot import TelegramBot
from telegram_handler.button_callback.button_callback_context import ButtonCallBackContext

View File

@ -0,0 +1,46 @@
START_MESSAGE: str = "Hi there! I'm JobSeeker Bot, your friendly job search assistant.😊\n" \
"I'm here to help you find the perfect position.\n\n" \
"To stop chatting with me at any time, just send '/cancel'.\n\n"
POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \
"(e.g., Software Engineer, Data Scientist, Marketing Manager)"
POSITION_NOT_FOUND: str = "I couldn't find any positions matching your request. 😕\n" \
"Please try again"
multi_value_message: str = "Enter multiple values separated by commas (e.g., value1, value2, value3) ✍️"
LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \
"(e.g., Rishon Lezion, New York City, San Francisco)\n\n" + multi_value_message
EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n"
EXPERIENCE_INVALID: str = "Oops! Please enter your experience in years as a number.😕" \
"For example, 2, 5, or 10."
JOB_AGE_MESSAGE: str = "How recent should the jobs be? ⏰\n" \
"(Enter the number of hours, e.g., 24 for last 24 hours, 168 for last week)"
# JOB_AGE_MESSAGE: str = "Within how many hours do you want to see jobs posted? ⏰\n" \
# "(Enter a number, e.g., 48 for the last 48 hours)"
JOB_AGE_INVALID: str = "Oops! Please enter a number for the number of hours. 😕\n" \
"For example, 24, 48, or 168."
FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any NOT relevant tags or keywords.\n" \
"For example: 'remote', 'BI', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message
THANK_YOU_MESSAGE: str = "Thank you for chatting with JobSeeker Bot!\n\n" \
"I can help you find jobs on LinkedIn, Glassdoor, and more."
SEARCH_MESSAGE: str = "To search for jobs on a specific site, simply send the site name:\n" \
"/linkedin\n" \
"/indeed\n" \
"/glassdoor\n" \
"/goozali\n\n" \
"Or, use the command /find to search across all supported job boards for a broader search.\n\n" \
"Let me know how I can assist you further! 😊"
BYE_MESSAGE: str = "Have a great day!✨\n" \
"I hope to assist you with your job search in the future.😊"
VERIFY_MESSAGE: str = "Did you choose: %s ? 🧐"

View File

@ -4,9 +4,10 @@ from telegram.ext import (
ContextTypes,
)
from db.job_repository import JobRepository
from jobspy import Site, scrape_jobs, JobPost
from jobspy.scrapers.utils import create_logger
from scrapers import Site, scrape_jobs, JobPost
from scrapers.utils import create_logger
from model.job_repository import JobRepository
from model.user_repository import user_repository
from telegram_bot import TelegramBot
from telegram_handler.telegram_handler import TelegramHandler
@ -33,11 +34,8 @@ def map_jobs_to_keyboard(jobs: list[JobPost]) -> InlineKeyboardMarkup:
class TelegramDefaultHandler(TelegramHandler):
def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str], search_term: str):
def __init__(self, sites: list[Site]):
self.sites_to_scrap = sites
self.locations = locations
self.search_term = search_term
self.title_filters = title_filters
self.telegram_bot = TelegramBot()
self.jobRepository = JobRepository()
if len(sites) == 1:
@ -51,17 +49,21 @@ class TelegramDefaultHandler(TelegramHandler):
chat_id = update.message.chat.id
await self.telegram_bot.set_message_reaction(chat_id,
update.message.message_id, ReactionEmoji.FIRE)
user = user_repository.find_by_username(update.message.from_user.username)
site_names = [site.name for site in self.sites_to_scrap]
site_names_print = ", ".join(site_names)
# locations = [location + ", Israel" for location in user.cities]
await self.telegram_bot.send_text(chat_id,
f"Start scarping: {site_names_print}")
filtered_out_jobs, jobs = scrape_jobs(
site_name=self.sites_to_scrap,
search_term=self.search_term,
locations=self.locations,
user=user,
search_term=user.position.value,
locations=user.cities,
results_wanted=200,
hours_old=48,
filter_by_title=self.title_filters,
hours_old=int(user.job_age),
filter_by_title=user.title_filters,
country_indeed='israel'
)
self.logger.info(f"Found {len(jobs)} jobs")

View File

@ -0,0 +1,29 @@
from telegram import Update
from telegram.constants import ReactionEmoji
from telegram.ext import (
ContextTypes,
)
from scrapers.utils import create_logger
from model.user_repository import user_repository
from telegram_bot import TelegramBot
from telegram_handler.telegram_handler import TelegramHandler
class MyInfoTelegramHandler(TelegramHandler):
def __init__(self):
self.telegram_bot = TelegramBot()
self._logger = create_logger("MyInfoTelegramHandler")
async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
self._logger.info("start handling")
chat_id = update.message.chat.id
await self.telegram_bot.set_message_reaction(chat_id,
update.message.message_id, ReactionEmoji.FIRE)
user = user_repository.find_by_username(update.message.from_user.username)
await self.telegram_bot.send_text(chat_id, user.get_myinfo_message())
self._logger.info("finished handling")
my_info_handler = MyInfoTelegramHandler()

View File

@ -0,0 +1,216 @@
from enum import Enum
from telegram import Update, Chat, KeyboardButton, ReplyKeyboardMarkup, ReplyKeyboardRemove
from telegram.constants import ReactionEmoji
from telegram.ext import (
ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters,
)
from config.cache_manager import cache_manager
from model.Position import Position
from model.User import User
from model.user_repository import user_repository
from scrapers.utils import create_logger
from telegram_bot import TelegramBot
from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \
LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE, \
SEARCH_MESSAGE, EXPERIENCE_INVALID, JOB_AGE_INVALID, JOB_AGE_MESSAGE
class Flow(Enum):
POSITION = 0
ADDRESS = 1
FILTERS = 2
EXPERIENCE = 3
VERIFY_ADDRESS = 4
VERIFY_FILTERS = 5
SKIP_FILTERS = 6
JOB_AGE = 7
class TelegramStartHandler:
def __init__(self):
self.telegram_bot = TelegramBot()
self.logger = create_logger("TelegramStartHandler")
async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Starts the conversation and asks the user about their position."""
chat: Chat = update.message.chat
user = user_repository.find_by_username(chat.username)
if not user:
user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id)
user_repository.insert_user(user)
await update.message.reply_text(START_MESSAGE)
buttons = [[KeyboardButton(position.value)] for position in Position]
reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True,
input_field_placeholder=Flow.POSITION.name)
await update.message.reply_text(
POSITION_MESSAGE,
reply_markup=reply_markup,
)
return Flow.POSITION.value
async def position(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Stores the selected position and asks for a locations."""
user = update.message.from_user
self.logger.info("Position of %s: %s", user.first_name, update.message.text)
position = next((p for p in Position if p.value == update.message.text), None)
if not position:
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
await update.message.reply_text(POSITION_NOT_FOUND)
buttons = [[KeyboardButton(position.value)] for position in Position]
reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True,
input_field_placeholder=Flow.POSITION.name)
await update.message.reply_text(
POSITION_MESSAGE,
reply_markup=reply_markup,
)
return Flow.POSITION.value
await update.message.set_reaction(ReactionEmoji.FIRE)
cached_user: User = cache_manager.find(user.username)
cached_user.position = position
cache_manager.save(cached_user.username, cached_user)
await update.message.reply_text(LOCATION_MESSAGE)
return Flow.ADDRESS.value
async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Asks for a location."""
cities = update.message.text.split(",")
# Remove leading/trailing spaces from each city name
cities = [city.strip() for city in cities]
await update.message.set_reaction(ReactionEmoji.FIRE)
reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True,
input_field_placeholder=Flow.VERIFY_ADDRESS.name)
await update.message.reply_text(VERIFY_MESSAGE % cities, reply_markup=reply_markup)
cached_user: User = cache_manager.find(update.message.from_user.username)
cached_user.cities = cities
cache_manager.save(cached_user.username, cached_user)
return Flow.VERIFY_ADDRESS.value
async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Verify for a Address."""
if update.message.text == "No":
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
await update.message.reply_text(LOCATION_MESSAGE)
return Flow.ADDRESS.value
await update.message.set_reaction(ReactionEmoji.FIRE)
await update.message.reply_text(EXPERIENCE_MESSAGE)
return Flow.EXPERIENCE.value
async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Asks for a experience."""
user = update.message.from_user
self.logger.info("Experience of %s: %s", user.first_name, update.message.text)
if not update.message.text.isnumeric():
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
await update.message.reply_text(EXPERIENCE_INVALID)
await update.message.reply_text(EXPERIENCE_MESSAGE)
return Flow.EXPERIENCE.value
await update.message.set_reaction(ReactionEmoji.FIRE)
cached_user: User = cache_manager.find(update.message.from_user.username)
cached_user.experience = update.message.text
cache_manager.save(cached_user.username, cached_user)
await update.message.reply_text(JOB_AGE_MESSAGE)
return Flow.JOB_AGE.value
async def job_age(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Asks for a Job age in hours."""
await update.message.set_reaction(ReactionEmoji.FIRE)
user = update.message.from_user
self.logger.info("Job age of %s: %s", user.first_name, update.message.text)
if not update.message.text.isnumeric():
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
await update.message.reply_text(JOB_AGE_INVALID)
await update.message.reply_text(JOB_AGE_MESSAGE)
return Flow.JOB_AGE.value
await update.message.set_reaction(ReactionEmoji.FIRE)
cached_user: User = cache_manager.find(update.message.from_user.username)
cached_user.job_age = update.message.text
cache_manager.save(cached_user.username, cached_user)
await update.message.reply_text(
FILTER_TILE_MESSAGE)
return Flow.FILTERS.value
async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Asks for a filters_flow."""
await update.message.set_reaction(ReactionEmoji.FIRE)
title_filters = update.message.text.split(",")
# Remove leading/trailing spaces from each city name
title_filters = [title_filter.strip() for title_filter in title_filters]
reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True,
input_field_placeholder=Flow.VERIFY_FILTERS.name)
await update.message.reply_text(VERIFY_MESSAGE % title_filters, reply_markup=reply_markup)
cached_user: User = cache_manager.find(update.message.from_user.username)
cached_user.title_filters = title_filters
cache_manager.save(cached_user.username, cached_user)
return Flow.VERIFY_FILTERS.value
async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Verify for a filters_flow."""
if update.message.text == "No":
await update.message.set_reaction(ReactionEmoji.PILE_OF_POO)
await update.message.reply_text(FILTER_TILE_MESSAGE)
return Flow.FILTERS.value
await update.message.set_reaction(ReactionEmoji.FIRE)
await update.message.reply_text(THANK_YOU_MESSAGE)
await update.message.reply_text(SEARCH_MESSAGE)
cached_user: User = cache_manager.find(update.message.from_user.username)
user_repository.update(cached_user)
return ConversationHandler.END
async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Skips the location and asks for info about the user."""
await update.message.set_reaction(ReactionEmoji.FIRE)
user = update.message.from_user
self.logger.info("User %s did not send a filters.", user.first_name)
await update.message.reply_text(THANK_YOU_MESSAGE)
await update.message.reply_text(SEARCH_MESSAGE)
return ConversationHandler.END
async def cancel(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int:
"""Cancels and ends the conversation."""
await update.message.set_reaction(ReactionEmoji.FIRE)
user = update.message.from_user
self.logger.info("User %s canceled the conversation.", user.first_name)
await update.message.reply_text(
BYE_MESSAGE, reply_markup=ReplyKeyboardRemove()
)
cached_user: User = cache_manager.find(user.username)
user_repository.update(cached_user.username, cached_user)
return ConversationHandler.END
start_handler = TelegramStartHandler()
start_conv_handler = ConversationHandler(
entry_points=[CommandHandler("start", start_handler.start)],
states={
Flow.POSITION.value: [MessageHandler(filters.TEXT, start_handler.position)],
Flow.ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.address)],
Flow.VERIFY_ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.verify_address)],
Flow.EXPERIENCE.value: [MessageHandler(filters.TEXT, start_handler.experience)],
Flow.JOB_AGE.value: [MessageHandler(filters.TEXT, start_handler.job_age)],
Flow.FILTERS.value: [MessageHandler(filters.TEXT, start_handler.filters_flow)],
Flow.VERIFY_FILTERS.value: [MessageHandler(filters.TEXT, start_handler.verify_filter)],
},
fallbacks=[CommandHandler("cancel", start_handler.cancel)],
)

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs
from scrapers import scrape_jobs
import pandas as pd

View File

@ -1,6 +1,6 @@
from dotenv import load_dotenv
from db.job_repository import JobRepository
from model.job_repository import JobRepository
from tests.test_util import createMockJob
load_dotenv()

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs
from scrapers import scrape_jobs
import pandas as pd

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs
from scrapers import scrape_jobs
import pandas as pd

View File

@ -1,12 +1,12 @@
import json
import os
from jobspy.jobs import JobPost
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData
from jobspy.scrapers.utils import create_dict_by_key_and_value
from jobs import JobPost
from scrapers.goozali.GoozaliMapper import GoozaliMapper
from scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
from scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
from scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData
from scrapers.utils import create_dict_by_key_and_value
# URL Example
# https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs
from scrapers import scrape_jobs
import pandas as pd

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs
from scrapers import scrape_jobs
import pandas as pd

View File

@ -1,7 +1,7 @@
from datetime import datetime, date
from typing import List
from jobspy import JobPost, Location, Country
from scrapers import JobPost, Location, Country
# Creating some test job posts

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs
from scrapers import scrape_jobs
import pandas as pd