diff --git a/pyproject.toml b/pyproject.toml index c4275a7..fdb9bec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,15 +3,15 @@ requires = [ "poetry-core",] build-backend = "poetry.core.masonry.api" [tool.poetry] -name = "python-jobspy" +name = "python-JobSeekerTG" version = "1.1.76" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" -authors = [ "Zachary Hampton ", "Cullen Watson ",] -homepage = "https://github.com/Bunsly/JobSpy" +authors = [ "YM "] +homepage = "https://github.com/yariv245/JobSeekerTG" readme = "README.md" keywords = [ "jobs-scraper", "linkedin", "indeed", "glassdoor", "ziprecruiter",] [[tool.poetry.packages]] -include = "jobspy" +include = "JobSeekerTG" from = "src" [tool.black] diff --git a/requirements.txt b/requirements.txt index 5b69741..723ff53 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/src/config/cache_manager.py b/src/config/cache_manager.py new file mode 100644 index 0000000..e65c67f --- /dev/null +++ b/src/config/cache_manager.py @@ -0,0 +1,17 @@ +from cachebox import LRUCache + + +class CacheboxCacheManager: + def __init__(self): + self._cache = LRUCache(50) + + def find(self, cache_id: str): + """Finding cached data by id, else None""" + return self._cache.get(cache_id) + + def save(self, cache_id: str, data): + """Finding cached data by id, else None""" + self._cache.insert(cache_id, data) + + +cache_manager = CacheboxCacheManager() diff --git a/src/db/monogo_db.py b/src/db/monogo_db.py deleted file mode 100644 index bcac938..0000000 --- a/src/db/monogo_db.py +++ /dev/null @@ -1,34 +0,0 @@ -import os - -from pymongo import MongoClient -from pymongo.synchronous.database import Database - -from config.settings import settings -from jobspy import create_logger - - -class MongoDB: - _instance = None - db:Database = None - def __new__(cls): - - if cls._instance is not None: - return cls._instance - - self = super().__new__(cls) - cls._instance = self - logger = create_logger("Mongo Client") - mongoUri = settings.mongo_uri - if not mongoUri: - logger.error("MONGO_URI environment variable is not set") - raise ValueError("MONGO_URI environment variable is not set") - client = MongoClient(mongoUri) - database_name = settings.mongo_db_name - if not database_name: - logger.error("MONGO_DB_NAME environment variable is not set") - raise ValueError( - "MONGO_DB_NAME environment variable is not set") - - self.db = client[database_name] - logger.info("Succeed connect to MongoDB") - return cls._instance diff --git a/src/jobspy/jobs/__init__.py b/src/jobs/__init__.py similarity index 100% rename from src/jobspy/jobs/__init__.py rename to src/jobs/__init__.py diff --git a/src/jobspy/scrapers/goozali/constants.py b/src/jobspy/scrapers/goozali/constants.py deleted file mode 100644 index 458320f..0000000 --- a/src/jobspy/scrapers/goozali/constants.py +++ /dev/null @@ -1,29 +0,0 @@ -from .model import GoozaliColumn - - -job_post_column_to_goozali_column = { - "date_posted": "Discovered", - "field": "Field", - "title": "Job Title", - "job_url": "Position Link", - "company_name": "Company", - "description": "Requirements", - "location": "Location", - "company_industry": "Company Industry", - "id": "Job ID" -} - -job_post_column_names = ["id", - "date_posted", - "field", - "title", - "job_url", - "company_name", - "description", - "location", - "company_industry"] - - -# Key mapper: Extract 'name' as the key -def extract_goozali_column_name(column): return column.name if isinstance( - column, GoozaliColumn) else None diff --git a/src/main.py b/src/main.py index a6263c8..99a87c1 100644 --- a/src/main.py +++ b/src/main.py @@ -1,13 +1,13 @@ -import os - from telegram import Update -from telegram.ext import Application, CommandHandler, CallbackQueryHandler, Updater +from telegram.ext import Application, CommandHandler, CallbackQueryHandler from config.settings import settings -from jobspy.scrapers.site import Site -from jobspy.scrapers.utils import create_logger +from scrapers import Site +from scrapers.utils import create_logger from telegram_handler import TelegramDefaultHandler from telegram_handler.button_callback.telegram_callback_handler import TelegramCallHandler +from telegram_handler.telegram_myinfo_handler import my_info_handler +from telegram_handler.telegram_start_handler import start_conv_handler logger = create_logger("Main") _api_token = settings.telegram_api_token @@ -17,52 +17,34 @@ title_filters: list[str] = ["test", "qa", "Lead", "Full-Stack", "Full Stack", "F "automation", "BI ", "Principal", "Architect", "Android", "Machine Learning", "Student", "Data Engineer", "DevSecOps"] - -async def stop(update, context): - logger.info("Stop polling from telegram") - application.stop_running() - if __name__ == "__main__": logger.info("Starting initialize ") search_term = "software engineer" locations = ["Tel Aviv, Israel", "Ramat Gan, Israel", "Central, Israel", "Rehovot ,Israel"] + application.add_handler(start_conv_handler) tg_callback_handler = TelegramCallHandler() - tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI], - locations=locations, - title_filters=title_filters, - search_term=search_term) + tg_handler_all = TelegramDefaultHandler(sites=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]) application.add_handler(CommandHandler("find", tg_handler_all.handle)) # Goozali - tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI], - locations=locations, - title_filters=title_filters, - search_term=search_term) + tg_handler_goozali = TelegramDefaultHandler(sites=[Site.GOOZALI]) application.add_handler(CommandHandler( Site.GOOZALI.value, tg_handler_goozali.handle)) # GlassDoor - tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR], - locations=locations, - title_filters=title_filters, - search_term=search_term) + tg_handler_glassdoor = TelegramDefaultHandler(sites=[Site.GLASSDOOR]) application.add_handler(CommandHandler( Site.GLASSDOOR.value, tg_handler_glassdoor.handle)) # LinkeDin - tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN], - locations=locations, - title_filters=title_filters, - search_term=search_term) + tg_handler_linkedin = TelegramDefaultHandler(sites=[Site.LINKEDIN]) application.add_handler(CommandHandler( Site.LINKEDIN.value, tg_handler_linkedin.handle)) # Indeed - tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED], - locations=locations, - title_filters=title_filters, - search_term=search_term) + tg_handler_indeed = TelegramDefaultHandler(sites=[Site.INDEED]) application.add_handler(CommandHandler( Site.INDEED.value, tg_handler_indeed.handle)) + application.add_handler(CommandHandler( + "myInfo", my_info_handler.handle)) application.add_handler(CallbackQueryHandler( tg_callback_handler.button_callback)) - application.add_handler(CommandHandler('stop', stop)) logger.info("Run polling from telegram") application.run_polling(allowed_updates=Update.ALL_TYPES) diff --git a/src/model/Position.py b/src/model/Position.py new file mode 100644 index 0000000..7717206 --- /dev/null +++ b/src/model/Position.py @@ -0,0 +1,26 @@ +from enum import Enum + + +class Position(str, Enum): + BACKEND_DEVELOPER = "Backend Developer" + FULLSTACK_DEVELOPER = "Fullstack Developer" + FRONTEND_DEVELOPER = "Frontend Developer" + DATA_SCIENTIST = "Data Scientist" + DATA_ANALYST = "Data Analyst" + PROJECT_MANAGER = "Project Manager" + CLOUD_ENGINEER = "Cloud Engineer" + CLOUD_ARCHITECT = "Cloud Architect" + UX_UI_DESIGNER = "UX/UI Designer" + PRODUCT_MANAGER = "Product Manager" + DEV_OPS_ENGINEER = "DevOps Engineer" + BUSINESS_ANALYST = "Business Analyst" + CYBERSECURITY_ENGINEER = "Cybersecurity Engineer" + MACHINE_LEARNING_ENGINEER = "Machine Learning Engineer" + ARTIFICIAL_INTELLIGENCE_ENGINEER = "Artificial Intelligence Engineer" + DATABASE_ADMINISTRATOR = "Database Administrator" + SYSTEMS_ADMINISTRATOR = "Systems Administrator" + NETWORK_ENGINEER = "Network Engineer" + TECHNICAL_SUPPORT_SPECIALIST = "Technical Support Specialist" + SALES_ENGINEER = "Sales Engineer" + SCRUM_MASTER = "Scrum Master" + IT_MANAGER = "IT Manager" diff --git a/src/model/User.py b/src/model/User.py new file mode 100644 index 0000000..afa1019 --- /dev/null +++ b/src/model/User.py @@ -0,0 +1,34 @@ +from typing import Optional, Union + +from pydantic import BaseModel, Field + +from model.Position import Position + + +class User(BaseModel): + full_name: str + username: str + chat_id: Union[int, str] = None + experience: Union[int, str] = None + job_age: Union[int, str] = None + position: Optional[Position] = None + cities: Optional[list[str]] = None + title_filters: Optional[list[str]] = None + + def get_myinfo_message(self): + message = "Here's your profile:\n\n" + message += f"Full Name: {self.full_name}\n" + message += f"Username: @{self.username}\n" + if self.chat_id: + message += f"Chat ID: {self.chat_id}\n" + if self.job_age: + message += f"Job Age (Hours): {self.experience}\n" + if self.experience: + message += f"Experience(Years): {self.experience}\n" + if self.position: + message += f"Position Level: {self.position.value}\n" + if self.cities: + message += f"Preferred Cities: {', '.join(self.cities)}\n" + if self.title_filters: + message += f"Job Title Filters: {', '.join(self.title_filters)}\n" + return message \ No newline at end of file diff --git a/src/db/__init__.py b/src/model/__init__.py similarity index 100% rename from src/db/__init__.py rename to src/model/__init__.py diff --git a/src/model/codec/__init__.py b/src/model/codec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/model/codec/position_codec.py b/src/model/codec/position_codec.py new file mode 100644 index 0000000..95bbddc --- /dev/null +++ b/src/model/codec/position_codec.py @@ -0,0 +1,17 @@ +from bson.codec_options import TypeCodec + +from model.Position import Position + + +class PositionCodec(TypeCodec): + python_type = Position + bson_type = str + + def transform_python(self, value): + return value.name + + def transform_bson(self, value): + return Position(value) + + +# position_codec = PositionCodec() diff --git a/src/db/job_repository.py b/src/model/job_repository.py similarity index 74% rename from src/db/job_repository.py rename to src/model/job_repository.py index 290031f..25b2afe 100644 --- a/src/db/job_repository.py +++ b/src/model/job_repository.py @@ -3,27 +3,17 @@ from typing import Optional from dotenv import load_dotenv from pymongo import UpdateOne -from .monogo_db import MongoDB -from jobspy import create_logger -from jobspy.jobs import JobPost +from scrapers import create_logger +from jobs import JobPost +from .monogo_db import mongo_client load_dotenv() class JobRepository: - _instance = None - - def __new__(cls): - - if cls._instance is not None: - return cls._instance - - self = super().__new__(cls) - cls._instance = self - self.logger = create_logger("JobRepository") - mongo_client = MongoDB() - self.collection = mongo_client.db["jobs"] - return cls._instance + def __init__(self): + self._logger = create_logger("JobRepository") + self._collection = mongo_client.get_collection('jobs') def find_by_id(self, job_id: str) -> Optional[JobPost]: """ @@ -35,7 +25,7 @@ class JobRepository: Returns: The job document if found, otherwise None. """ - result = self.collection.find_one({"id": job_id}) + result = self._collection.find_one({"id": job_id}) return JobPost(**result) def update(self, job: JobPost) -> bool: @@ -48,7 +38,7 @@ class JobRepository: Returns: True if the update was successful, False otherwise. """ - result = self.collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})}) + result = self._collection.update_one({"id": job.id}, {"$set": job.model_dump(exclude={"date_posted"})}) return result.modified_count > 0 def insert_job(self, job: JobPost): @@ -62,8 +52,8 @@ class JobRepository: Exception: If an error occurs during insertion. """ job_dict = job.model_dump(exclude={"date_posted"}) - self.collection.insert_one(job_dict) - self.logger.info(f"Inserted new job with title {job.title}.") + self._collection.insert_one(job_dict) + self._logger.info(f"Inserted new job with title {job.title}.") def insert_many_if_not_found(self, jobs: list[JobPost]) -> tuple[list[JobPost], list[JobPost]]: """ @@ -86,8 +76,8 @@ class JobRepository: if operations: # Execute all operations in bulk - result = self.collection.bulk_write(operations) - self.logger.info(f"Matched: {result.matched_count}, Upserts: { + result = self._collection.bulk_write(operations) + self._logger.info(f"Matched: {result.matched_count}, Upserts: { result.upserted_count}, Modified: {result.modified_count}") # Get the newly inserted jobs (those that were upserted) @@ -99,3 +89,5 @@ class JobRepository: old_jobs.append(job) return old_jobs, new_jobs + +job_repository = JobRepository() \ No newline at end of file diff --git a/src/model/monogo_db.py b/src/model/monogo_db.py new file mode 100644 index 0000000..c8ab81c --- /dev/null +++ b/src/model/monogo_db.py @@ -0,0 +1,38 @@ +from pymongo import MongoClient +from pymongo.synchronous.database import Database + +from config.settings import settings +from scrapers.utils import create_logger + + +class MongoDB: + def __init__(self): + logger = create_logger("Mongo Client") + mongo_uri = settings.mongo_uri + if not mongo_uri: + logger.error("MONGO_URI environment variable is not set") + raise ValueError("MONGO_URI environment variable is not set") + client = MongoClient(mongo_uri) + database_name = settings.mongo_db_name + if not database_name: + logger.error("MONGO_DB_NAME environment variable is not set") + raise ValueError( + "MONGO_DB_NAME environment variable is not set") + + self._db: Database = client[database_name] + logger.info("Succeed connect to MongoDB") + + def get_collection(self, + name: str, + codec_options=None, + read_preference=None, + write_concern=None, + read_concern=None): + return self._db.get_collection(name, + codec_options, + read_preference, + write_concern, + read_concern) + + +mongo_client = MongoDB() diff --git a/src/model/user_repository.py b/src/model/user_repository.py new file mode 100644 index 0000000..608bf32 --- /dev/null +++ b/src/model/user_repository.py @@ -0,0 +1,129 @@ +from typing import Optional + +from dotenv import load_dotenv +from pymongo import UpdateOne + +from config.cache_manager import cache_manager +from scrapers.utils import create_logger +from .User import User +from .monogo_db import mongo_client + +load_dotenv() + + +class UserRepository: + def __init__(self): + self._logger = create_logger("UserRepository") + self._collection = mongo_client.get_collection('user') + self._collection.create_index('username', unique=True) + + def find_by_id(self, user_id: str) -> Optional[User]: + """ + Finds a user document in the collection by its ID. + + Args: + user_id: The ID of the user to find. + + Returns: + The user document if found, otherwise None. + """ + user = None + cached_user = cache_manager.find(user_id) + if cached_user: + return cached_user + + result = self._collection.find_one({"id": user_id}) + + if result: + user = User(**result) + cache_manager.save(user_id, user) + + return user + + def find_by_username(self, username: str) -> Optional[User]: + """ + Finds a user document in the collection by its username. + + Args: + username: The username of the user to find. + + Returns: + The user document if found, otherwise None. + """ + user = None + cached_user = cache_manager.find(username) + if cached_user: + return cached_user + + result = self._collection.find_one({"username": username}) + self._logger.info("find user by usernameeeeeeee") + if result: + user = User(**result) + cache_manager.save(username, user) + + return user + + def update(self, user: User) -> bool: + """ + Updates a User in the database. + + Args: + user: A dictionary representing the User data. + + Returns: + True if the update was successful, False otherwise. + """ + result = self._collection.update_one({"username": user.username}, {"$set": user.model_dump()}) + return result.modified_count > 0 + + def insert_user(self, user: User): + """ + Inserts a new user posting into the database collection. + + Args: + user (User): The User object to be inserted. + + Raises: + Exception: If an error occurs during insertion. + """ + self._collection.insert_one(user.model_dump()) + cache_manager.save(user.username, user) + self._logger.info(f"Inserted new user with username {user.username}.") + + def insert_many_if_not_found(self, users: list[User]) -> tuple[list[User], list[User]]: + """ + Perform bulk upserts for a list of User objects into a MongoDB collection. + Only insert new users and return the list of newly inserted users. + """ + operations = [] + new_users = [] # List to store the new users inserted into MongoDB + old_users = [] # List to store the new users inserted into MongoDB + for user in users: + user_dict = user.model_dump() + operations.append( + UpdateOne( + {"id": user.id}, # Match by `id` + # Only set fields if the user is being inserted (not updated) + {"$setOnInsert": user_dict}, + upsert=True # Insert if not found, but do not update if already exists + ) + ) + + if operations: + # Execute all operations in bulk + result = self._collection.bulk_write(operations) + self._logger.info(f"Matched: {result.matched_count}, Upserts: { + result.upserted_count}, Modified: {result.modified_count}") + + # Get the newly inserted users (those that were upserted) + # The `upserted_count` corresponds to how many new documents were inserted + for i, user in enumerate(users): + if result.upserted_count > 0 and i < result.upserted_count: + new_users.append(user) + else: + old_users.append(user) + + return old_users, new_users + + +user_repository = UserRepository() diff --git a/src/jobspy/__init__.py b/src/scrapers/__init__.py similarity index 87% rename from src/jobspy/__init__.py rename to src/scrapers/__init__.py index 60980db..65e4a54 100644 --- a/src/jobspy/__init__.py +++ b/src/scrapers/__init__.py @@ -2,34 +2,36 @@ from __future__ import annotations import re from threading import Lock - -import pandas as pd -from typing import Tuple from concurrent.futures import ThreadPoolExecutor, as_completed -from .scrapers.site import Site -from .scrapers.goozali import GoozaliScraper - -from .jobs import JobPost, JobType, Location -from .scrapers.utils import set_logger_level, extract_salary, create_logger -from .scrapers.indeed import IndeedScraper -from .scrapers.ziprecruiter import ZipRecruiterScraper -from .scrapers.glassdoor import GlassdoorScraper -from .scrapers.google import GoogleJobsScraper -from .scrapers.linkedin import LinkedInScraper -from .scrapers import SalarySource, ScraperInput, JobResponse, Country -from .scrapers.exceptions import ( - LinkedInException, - IndeedException, - ZipRecruiterException, - GlassdoorException, - GoogleJobsException, +from jobs import ( + Enum, + JobType, + JobResponse, + Country, + JobPost, ) +from model.User import User +from .glassdoor import GlassdoorScraper +from .google import GoogleJobsScraper +from .goozali import GoozaliScraper +from .indeed import IndeedScraper +from .linkedin import LinkedInScraper +from .scraper_input import ScraperInput +from .site import Site +from .utils import set_logger_level, create_logger +from .ziprecruiter import ZipRecruiterScraper + + +class SalarySource(Enum): + DIRECT_DATA = "direct_data" + DESCRIPTION = "description" def scrape_jobs( site_name: str | list[str] | Site | list[Site] | None = None, + user: User = None, search_term: str | None = None, google_search_term: str | None = None, location: str | None = None, @@ -55,7 +57,7 @@ def scrape_jobs( ) -> (list[JobPost], list[JobPost]): """ Simultaneously scrapes job data from multiple job sites. - :return: pandas dataframe containing job data + :return: list of jobPost, list of new jobPost """ SCRAPER_MAPPING = { Site.LINKEDIN: LinkedInScraper, @@ -93,6 +95,7 @@ def scrape_jobs( country_enum = Country.from_string(country_indeed) scraper_input = ScraperInput( + user=user, site_type=get_site_type(), country=country_enum, search_term=search_term, @@ -111,7 +114,7 @@ def scrape_jobs( hours_old=hours_old ) - def scrape_site(site: Site) -> Tuple[str, JobResponse]: + def scrape_site(site: Site) -> tuple[str, JobResponse]: scraper_class = SCRAPER_MAPPING[site] scraper = scraper_class(proxies=proxies, ca_cert=ca_cert) scraped_data: JobResponse = scraper.scrape(scraper_input) @@ -166,6 +169,10 @@ def scrape_jobs( """ filtered_jobs = [] remaining_jobs = [] + + if not filter_by_title: + return filtered_jobs, remaining_jobs + for job in jobs: for filter_title in filter_by_title: if re.search(filter_title, job.title, re.IGNORECASE): diff --git a/src/jobspy/scrapers/exceptions.py b/src/scrapers/exceptions.py similarity index 96% rename from src/jobspy/scrapers/exceptions.py rename to src/scrapers/exceptions.py index eba0479..dcfb3d2 100644 --- a/src/jobspy/scrapers/exceptions.py +++ b/src/scrapers/exceptions.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.exceptions +scrapers.exceptions ~~~~~~~~~~~~~~~~~~~ This module contains the set of Scrapers' exceptions. diff --git a/src/jobspy/scrapers/glassdoor/GlassDoorLocation.py b/src/scrapers/glassdoor/GlassDoorLocation.py similarity index 100% rename from src/jobspy/scrapers/glassdoor/GlassDoorLocation.py rename to src/scrapers/glassdoor/GlassDoorLocation.py diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/scrapers/glassdoor/__init__.py similarity index 99% rename from src/jobspy/scrapers/glassdoor/__init__.py rename to src/scrapers/glassdoor/__init__.py index 6266501..d0bf582 100644 --- a/src/jobspy/scrapers/glassdoor/__init__.py +++ b/src/scrapers/glassdoor/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.glassdoor +scrapers.glassdoor ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Glassdoor. @@ -7,7 +7,6 @@ This module contains routines to scrape Glassdoor. from __future__ import annotations -from dataclasses import dataclass import re import json import requests @@ -18,14 +17,16 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from .GlassDoorLocation import GlassDoorLocationResponse, get_location_id, get_location_type from .constants import fallback_token, query_template, headers -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..utils import extract_emails_from_text, create_logger from ..exceptions import GlassdoorException from ..utils import ( create_session, markdown_converter, ) -from ...jobs import ( +from jobs import ( JobPost, Compensation, CompensationInterval, diff --git a/src/jobspy/scrapers/glassdoor/constants.py b/src/scrapers/glassdoor/constants.py similarity index 100% rename from src/jobspy/scrapers/glassdoor/constants.py rename to src/scrapers/glassdoor/constants.py diff --git a/src/jobspy/scrapers/google/__init__.py b/src/scrapers/google/__init__.py similarity index 98% rename from src/jobspy/scrapers/google/__init__.py rename to src/scrapers/google/__init__.py index 523e6f5..05cab69 100644 --- a/src/jobspy/scrapers/google/__init__.py +++ b/src/scrapers/google/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.google +scrapers.google ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Google. @@ -14,12 +14,14 @@ from typing import Tuple from datetime import datetime, timedelta from .constants import headers_jobs, headers_initial, async_param -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..utils import extract_emails_from_text, create_logger, extract_job_type from ..utils import ( create_session, ) -from ...jobs import ( +from jobs import ( JobPost, JobResponse, Location, diff --git a/src/jobspy/scrapers/google/constants.py b/src/scrapers/google/constants.py similarity index 100% rename from src/jobspy/scrapers/google/constants.py rename to src/scrapers/google/constants.py diff --git a/src/jobspy/scrapers/goozali/GoozaliMapper.py b/src/scrapers/goozali/GoozaliMapper.py similarity index 99% rename from src/jobspy/scrapers/goozali/GoozaliMapper.py rename to src/scrapers/goozali/GoozaliMapper.py index a68e6ba..0b37e43 100644 --- a/src/jobspy/scrapers/goozali/GoozaliMapper.py +++ b/src/scrapers/goozali/GoozaliMapper.py @@ -1,7 +1,7 @@ from datetime import datetime import json -from jobspy.jobs import JobPost, Location +from jobs import JobPost, Location from .model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliResponseData from .constants import job_post_column_to_goozali_column, job_post_column_names diff --git a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py b/src/scrapers/goozali/GoozaliScrapperComponent.py similarity index 57% rename from src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py rename to src/scrapers/goozali/GoozaliScrapperComponent.py index bcab0d3..5025713 100644 --- a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py +++ b/src/scrapers/goozali/GoozaliScrapperComponent.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice +from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliFieldChoice from ..utils import create_logger # Mapping function to convert parsed dictionary into GoozaliResponseData @@ -13,12 +13,20 @@ class GoozaliScrapperComponent: pass # Function to filter GoozaliRows based on hours old - def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn, column_choice: GoozaliColumnChoice) -> list[GoozaliRow]: + def filter_rows_by_column_choice(self, rows: list[GoozaliRow], column: GoozaliColumn, + column_choices: list[GoozaliColumnChoice]) -> list[GoozaliRow]: return [ - row for row in rows - if row.cellValuesByColumnId[column.id] == column_choice.id + row + for row in rows + if row.cellValuesByColumnId.get(column.id) + and any(choice.id == row.cellValuesByColumnId[column.id] for choice in column_choices) ] + # return [ + # row for row in rows + # if row.cellValuesByColumnId[column.id] == column_choice.id + # ] + def filter_rows_by_hours(self, rows: list[GoozaliRow], hours: int) -> list[GoozaliRow]: # Current time now = datetime.now() @@ -39,14 +47,20 @@ class GoozaliScrapperComponent: if (column.name == column_name): return column - def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice: + def find_choices_from_column(self, column: GoozaliColumn, choices: list[GoozaliFieldChoice]) -> list[ + GoozaliColumnChoice]: if not column.typeOptions.choices: logger.exception(f"Choices for column {column.name} doesn't exist") raise Exception(f"Choices for column {column.name} doesn't exist") + chosen_values = [c.value for c in choices] + goozali_column_choices = [] for key, choice in column.typeOptions.choices.items(): - if (choice.name == choice_name): - return choice + if choice.name in chosen_values: + goozali_column_choices.append(choice) - logger.exception(f"Can't find {choice_name} for column {column.name}") - raise Exception(f"Can't find {choice_name} for column {column.name}") + if len(goozali_column_choices) == 0: + logger.exception(f"Can't find {choices} for column {column.name}") + raise Exception(f"Can't find {choices} for column {column.name}") + + return goozali_column_choices diff --git a/src/jobspy/scrapers/goozali/__init__.py b/src/scrapers/goozali/__init__.py similarity index 85% rename from src/jobspy/scrapers/goozali/__init__.py rename to src/scrapers/goozali/__init__.py index 90f18eb..4396bed 100644 --- a/src/jobspy/scrapers/goozali/__init__.py +++ b/src/scrapers/goozali/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.Goozali +scrapers.Goozali ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Goozali. @@ -7,20 +7,20 @@ This module contains routines to scrape Goozali. from __future__ import annotations - -from .. import Scraper, ScraperInput -from .GoozaliMapper import GoozaliMapper -from .GoozaliScrapperComponent import GoozaliScrapperComponent -from .constants import extract_goozali_column_name, job_post_column_to_goozali_column -from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest -from ..site import Site - -from ..utils import create_dict_by_key_and_value, create_session, create_logger -from ...jobs import ( +from jobs import ( JobPost, JobResponse, ) -logger = create_logger("Goozali") +from .GoozaliMapper import GoozaliMapper +from .GoozaliScrapperComponent import GoozaliScrapperComponent +from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map +from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site +from ..utils import create_dict_by_key_and_value, create_session, create_logger + +logger = create_logger("GoozaliScraper") class GoozaliScraper(Scraper): @@ -67,27 +67,24 @@ class GoozaliScraper(Scraper): logger.info(f"response: {str(response)}") if (response.status_code != 200): logger.error(f"Status code: {response.status_code}, Error: { - str(response.text)}") + str(response.text)}") return JobResponse(jobs=job_list) except Exception as e: logger.error(f"Exception: {str(e)}") return JobResponse(jobs=job_list) - # model the response with models goozali_response = self.mapper.map_response_to_goozali_response( response=response) - # suggestL create groupby field and then filter by hours - # filter result by Field column = self.component.find_column( goozali_response.data.columns, job_post_column_to_goozali_column["field"]) - column_choice = self.component.find_choice_from_column( - column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value) + user_goozali_fields = position_to_goozali_field_map[scraper_input.user.position] + column_choices = self.component.find_choices_from_column( + column, user_goozali_fields) filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice( - goozali_response.data.rows, column, column_choice) + goozali_response.data.rows, column, column_choices) filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours( filtered_rows_by_column_choice, scraper_input.hours_old) dict_column_name_to_column: dict[str, GoozaliColumn] = create_dict_by_key_and_value( goozali_response.data.columns, extract_goozali_column_name) - # map to JobResponse Object for row in filtered_rows_by_age_and_column_choice: job_post = self.mapper.map_goozali_response_to_job_post( row, dict_column_name_to_column) diff --git a/src/scrapers/goozali/constants.py b/src/scrapers/goozali/constants.py new file mode 100644 index 0000000..a598b3d --- /dev/null +++ b/src/scrapers/goozali/constants.py @@ -0,0 +1,92 @@ +from model.Position import Position +from .model import GoozaliColumn, GoozaliFieldChoice + +job_post_column_to_goozali_column = { + "date_posted": "Discovered", + "field": "Field", + "title": "Job Title", + "job_url": "Position Link", + "company_name": "Company", + "description": "Requirements", + "location": "Location", + "company_industry": "Company Industry", + "id": "Job ID" +} + +job_post_column_names = ["id", + "date_posted", + "field", + "title", + "job_url", + "company_name", + "description", + "location", + "company_industry"] + +fields = ["Product Management", + "Data Analyst", + "Data Science, ML & Algorithms", + "Software Engineering", + "QA", + "Cybersecurity", + "IT and System Administration", + "Frontend Development", + "DevOps", + "UI/UX, Design & Content", + "HR & Recruitment", + "Mobile Development", + "Hardware Engineering", + "Embedded, Low Level & Firmware Engineering", + "Customer Success", + "Project Management", + "Operations", + "Finance", + "Systems Engineering", + "Marketing", + "Sales", + "Compliance, Legal & Policy", + "C-Level", + "Business Development", + "Mechanical Engineering", + "Natural Science", + "Other"] + +def create_position_to_goozali_field_map(): + """ + Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values. + + Returns: + dict: A dictionary mapping Position to a list of GoozaliFieldChoice. + """ + position_to_goozali_map = { + Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST], + Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT], + Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT], + Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT], + Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS], + Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT], + Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY], + Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES], + Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT], + Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + } + return position_to_goozali_map + +# Get the map +position_to_goozali_field_map = create_position_to_goozali_field_map() + +# Key mapper: Extract 'name' as the key +def extract_goozali_column_name(column): return column.name if isinstance( + column, GoozaliColumn) else None diff --git a/src/jobspy/scrapers/goozali/model/GoozaliColumn.py b/src/scrapers/goozali/model/GoozaliColumn.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliColumn.py rename to src/scrapers/goozali/model/GoozaliColumn.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliColumnChoice.py b/src/scrapers/goozali/model/GoozaliColumnChoice.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliColumnChoice.py rename to src/scrapers/goozali/model/GoozaliColumnChoice.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliColumnTypeOptions.py b/src/scrapers/goozali/model/GoozaliColumnTypeOptions.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliColumnTypeOptions.py rename to src/scrapers/goozali/model/GoozaliColumnTypeOptions.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliFieldChoice.py b/src/scrapers/goozali/model/GoozaliFieldChoice.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliFieldChoice.py rename to src/scrapers/goozali/model/GoozaliFieldChoice.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliFullRequest.py b/src/scrapers/goozali/model/GoozaliFullRequest.py similarity index 91% rename from src/jobspy/scrapers/goozali/model/GoozaliFullRequest.py rename to src/scrapers/goozali/model/GoozaliFullRequest.py index 3387ed8..8c90193 100644 --- a/src/jobspy/scrapers/goozali/model/GoozaliFullRequest.py +++ b/src/scrapers/goozali/model/GoozaliFullRequest.py @@ -6,13 +6,13 @@ class GoozaliFullRequest(): self.view_id: str = "viwIOzPYaUGxlA0Jd" self.url = base_url.format(view_id=self.view_id) self.application_id: str = "appwewqLk7iUY4azc" - self.air_table_page_load_id: str = "pglqAAzFDZEWCEC7s" + self.air_table_page_load_id: str = "pglke45UFwdvQgBNJ" self.stringifiedObjectParams = { "shouldUseNestedResponseFormat": "true"} self.cookies: dict[str, str] = {} - self.request_id: str = "req4q4tKw3woEEWxw&" + self.request_id: str = "reqGjlEjOQFyRssam" self.share_id: str = "shrQBuWjXd0YgPqV6" - self.signature: str = "be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59" + self.signature: str = "7a1402a3f7f6f9a23c8db3849878812f2d3141da60f3b3d6e14dd4a910b91b74" self.headers = self._generate_headers() self.params = self._generate_params() self.cookies = {} @@ -66,7 +66,7 @@ class GoozaliFullRequest(): "shareId": self.share_id, "applicationId": self.application_id, "generationNumber": 0, - "expires": "2025-01-02T00:00:00.000Z", + "expires": "2025-01-30T00:00:00.000Z", "signature": self.signature } # Convert to a JSON string diff --git a/src/jobspy/scrapers/goozali/model/GoozaliPartRequest.py b/src/scrapers/goozali/model/GoozaliPartRequest.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliPartRequest.py rename to src/scrapers/goozali/model/GoozaliPartRequest.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliRequest.py b/src/scrapers/goozali/model/GoozaliRequest.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliRequest.py rename to src/scrapers/goozali/model/GoozaliRequest.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliResponse.py b/src/scrapers/goozali/model/GoozaliResponse.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliResponse.py rename to src/scrapers/goozali/model/GoozaliResponse.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliResponseData.py b/src/scrapers/goozali/model/GoozaliResponseData.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliResponseData.py rename to src/scrapers/goozali/model/GoozaliResponseData.py diff --git a/src/jobspy/scrapers/goozali/model/GoozaliRow.py b/src/scrapers/goozali/model/GoozaliRow.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/GoozaliRow.py rename to src/scrapers/goozali/model/GoozaliRow.py diff --git a/src/jobspy/scrapers/goozali/model/__init__.py b/src/scrapers/goozali/model/__init__.py similarity index 100% rename from src/jobspy/scrapers/goozali/model/__init__.py rename to src/scrapers/goozali/model/__init__.py diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/scrapers/indeed/__init__.py similarity index 94% rename from src/jobspy/scrapers/indeed/__init__.py rename to src/scrapers/indeed/__init__.py index 05ae16c..80ef94b 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/scrapers/indeed/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.indeed +scrapers.indeed ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Indeed. @@ -12,7 +12,9 @@ from typing import Tuple from datetime import datetime from .constants import job_search_query, api_headers -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..utils import ( extract_emails_from_text, get_enum_from_job_type, @@ -20,7 +22,7 @@ from ..utils import ( create_session, create_logger, ) -from ...jobs import ( +from jobs import ( JobPost, Compensation, CompensationInterval, @@ -35,7 +37,7 @@ logger = create_logger("Indeed") class IndeedScraper(Scraper): def __init__( - self, proxies: list[str] | str | None = None, ca_cert: str | None = None + self, proxies: list[str] | str | None = None, ca_cert: str | None = None ): """ Initializes IndeedScraper with the Indeed API url @@ -74,7 +76,7 @@ class IndeedScraper(Scraper): while len(self.seen_urls) < scraper_input.results_wanted + scraper_input.offset: logger.info( f"search page: { - page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}" + page} / {math.ceil(scraper_input.results_wanted / self.jobs_per_page)}" ) jobs, cursor = self._scrape_page(cursor, location) if not jobs: @@ -85,9 +87,9 @@ class IndeedScraper(Scraper): return JobResponse( jobs=job_list[ - scraper_input.offset: scraper_input.offset - + scraper_input.results_wanted - ] + scraper_input.offset: scraper_input.offset + + scraper_input.results_wanted + ] ) def _scrape_page(self, cursor: str | None, location: str) -> Tuple[list[JobPost], str | None]: @@ -108,7 +110,7 @@ class IndeedScraper(Scraper): what=(f'what: "{search_term}"' if search_term else ""), location=( f'location: {{where: "{location}", radius: { - self.scraper_input.distance}, radiusUnit: MILES}}' + self.scraper_input.distance}, radiusUnit: MILES}}' if location else "" ), @@ -130,7 +132,7 @@ class IndeedScraper(Scraper): if not response.ok: logger.info( f"responded with status code: { - response.status_code} (submit GitHub issue if this appears to be a bug)" + response.status_code} (submit GitHub issue if this appears to be a bug)" ) return jobs, new_cursor data = response.json() @@ -232,7 +234,7 @@ class IndeedScraper(Scraper): company_name=job["employer"].get( "name") if job.get("employer") else None, company_url=(f"{self.base_url}{ - rel_url}" if job["employer"] else None), + rel_url}" if job["employer"] else None), company_url_direct=( employer["links"]["corporateWebsite"] if employer else None ), @@ -345,7 +347,7 @@ class IndeedScraper(Scraper): for keyword in remote_keywords ) return ( - is_remote_in_attributes or is_remote_in_description or is_remote_in_location + is_remote_in_attributes or is_remote_in_description or is_remote_in_location ) @staticmethod diff --git a/src/jobspy/scrapers/indeed/constants.py b/src/scrapers/indeed/constants.py similarity index 100% rename from src/jobspy/scrapers/indeed/constants.py rename to src/scrapers/indeed/constants.py diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/scrapers/linkedin/__init__.py similarity index 98% rename from src/jobspy/scrapers/linkedin/__init__.py rename to src/scrapers/linkedin/__init__.py index 4519610..8e04d3f 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/scrapers/linkedin/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.linkedin +scrapers.linkedin ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape LinkedIn. @@ -17,13 +17,15 @@ from datetime import datetime from bs4.element import Tag from bs4 import BeautifulSoup from urllib.parse import urlparse, urlunparse, unquote -from requests.exceptions import RetryError, RequestException +from requests.exceptions import RetryError from urllib3.exceptions import MaxRetryError from .constants import headers -from .. import Scraper, ScraperInput, Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput +from ..site import Site from ..exceptions import LinkedInException from ..utils import create_session, remove_attributes, create_logger -from ...jobs import ( +from jobs import ( JobPost, Location, JobResponse, diff --git a/src/jobspy/scrapers/linkedin/constants.py b/src/scrapers/linkedin/constants.py similarity index 100% rename from src/jobspy/scrapers/linkedin/constants.py rename to src/scrapers/linkedin/constants.py diff --git a/src/scrapers/scraper.py b/src/scrapers/scraper.py new file mode 100644 index 0000000..c5ba529 --- /dev/null +++ b/src/scrapers/scraper.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod + +from jobs import JobResponse +from scrapers.site import Site +from scrapers.scraper_input import ScraperInput + + +class Scraper(ABC): + def __init__( + self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None + ): + self.site = site + self.proxies = proxies + self.ca_cert = ca_cert + + @abstractmethod + def scrape(self, scraper_input: ScraperInput) -> JobResponse: ... \ No newline at end of file diff --git a/src/jobspy/scrapers/__init__.py b/src/scrapers/scraper_input.py similarity index 50% rename from src/jobspy/scrapers/__init__.py rename to src/scrapers/scraper_input.py index c3f2756..381eec2 100644 --- a/src/jobspy/scrapers/__init__.py +++ b/src/scrapers/scraper_input.py @@ -1,25 +1,13 @@ -from __future__ import annotations +from pydantic import BaseModel -from abc import ABC, abstractmethod - -from .site import Site -from ..jobs import ( - Enum, - BaseModel, - JobType, - JobResponse, - Country, - DescriptionFormat, -) - - -class SalarySource(Enum): - DIRECT_DATA = "direct_data" - DESCRIPTION = "description" +from jobs import Country, JobType, DescriptionFormat +from model.User import User +from scrapers.site import Site class ScraperInput(BaseModel): site_type: list[Site] + user: User search_term: str | None = None google_search_term: str | None = None @@ -37,15 +25,3 @@ class ScraperInput(BaseModel): results_wanted: int = 15 hours_old: int | None = None - - -class Scraper(ABC): - def __init__( - self, site: Site, proxies: list[str] | None = None, ca_cert: str | None = None - ): - self.site = site - self.proxies = proxies - self.ca_cert = ca_cert - - @abstractmethod - def scrape(self, scraper_input: ScraperInput) -> JobResponse: ... diff --git a/src/jobspy/scrapers/site.py b/src/scrapers/site.py similarity index 100% rename from src/jobspy/scrapers/site.py rename to src/scrapers/site.py diff --git a/src/jobspy/scrapers/utils.py b/src/scrapers/utils.py similarity index 98% rename from src/jobspy/scrapers/utils.py rename to src/scrapers/utils.py index 6947650..ac77352 100644 --- a/src/jobspy/scrapers/utils.py +++ b/src/scrapers/utils.py @@ -11,11 +11,11 @@ import numpy as np from markdownify import markdownify as md from requests.adapters import HTTPAdapter, Retry -from ..jobs import CompensationInterval, JobType +from jobs import CompensationInterval, JobType def create_logger(name: str): - logger = logging.getLogger(f"JobSpy:{name}") + logger = logging.getLogger(f"JobSeekerTG:{name}") logger.propagate = False if not logger.handlers: logger.setLevel(logging.INFO) @@ -143,7 +143,7 @@ def set_logger_level(verbose: int = 2): level = getattr(logging, level_name.upper(), None) if level is not None: for logger_name in logging.root.manager.loggerDict: - if logger_name.startswith("JobSpy:"): + if logger_name.startswith("JobSeekerTG:"): logging.getLogger(logger_name).setLevel(level) else: raise ValueError(f"Invalid log level: {level_name}") diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/scrapers/ziprecruiter/__init__.py similarity index 98% rename from src/jobspy/scrapers/ziprecruiter/__init__.py rename to src/scrapers/ziprecruiter/__init__.py index 294ca8c..90dab76 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/scrapers/ziprecruiter/__init__.py @@ -1,5 +1,5 @@ """ -jobspy.scrapers.ziprecruiter +scrapers.ziprecruiter ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape ZipRecruiter. @@ -19,7 +19,9 @@ from concurrent.futures import ThreadPoolExecutor from bs4 import BeautifulSoup from .constants import headers -from .. import Scraper, ScraperInput, Site +from ..site import Site +from ..scraper import Scraper +from ..scraper_input import ScraperInput from ..utils import ( extract_emails_from_text, create_session, @@ -27,7 +29,7 @@ from ..utils import ( remove_attributes, create_logger, ) -from ...jobs import ( +from jobs import ( JobPost, Compensation, Location, diff --git a/src/jobspy/scrapers/ziprecruiter/constants.py b/src/scrapers/ziprecruiter/constants.py similarity index 100% rename from src/jobspy/scrapers/ziprecruiter/constants.py rename to src/scrapers/ziprecruiter/constants.py diff --git a/src/telegram_bot.py b/src/telegram_bot.py index 1bfaff9..6f7c1d7 100644 --- a/src/telegram_bot.py +++ b/src/telegram_bot.py @@ -1,4 +1,3 @@ -import os from typing import Union from dotenv import load_dotenv @@ -6,8 +5,8 @@ from telegram import Bot, InlineKeyboardButton, InlineKeyboardMarkup from telegram.constants import ReactionEmoji from config.settings import settings -from jobspy.jobs import JobPost -from jobspy.scrapers.utils import create_logger +from jobs import JobPost +from scrapers.utils import create_logger load_dotenv() diff --git a/src/telegram_handler/button_callback/button_callback_context.py b/src/telegram_handler/button_callback/button_callback_context.py index 111c88d..52c5fe9 100644 --- a/src/telegram_handler/button_callback/button_callback_context.py +++ b/src/telegram_handler/button_callback/button_callback_context.py @@ -3,8 +3,8 @@ from __future__ import annotations from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from db.job_repository import JobRepository -from jobspy import create_logger +from scrapers import create_logger +from model.job_repository import job_repository from telegram_handler.button_callback.button_fire_strategy import FireStrategy from telegram_handler.button_callback.button_job_title_strategy import JobTitleStrategy from telegram_handler.button_callback.button_poo_strategy import PooStrategy @@ -22,7 +22,6 @@ class ButtonCallBackContext: self._data = data self._job_id = job_id self._strategy = None - self._job_repository = JobRepository() @property def strategy(self) -> ButtonStrategy: @@ -49,10 +48,10 @@ class ButtonCallBackContext: elif ReactionEmoji.PILE_OF_POO.name == self._data: self._strategy = PooStrategy(self._message) elif self._data: - job = self._job_repository.find_by_id(self._data) + job = job_repository.find_by_id(self._data) if job: chat_id = self._message.chat.id - self._strategy = JobTitleStrategy(chat_id,job) + self._strategy = JobTitleStrategy(chat_id, job) else: self._logger.error("Invalid enum value") return diff --git a/src/telegram_handler/button_callback/button_fire_strategy.py b/src/telegram_handler/button_callback/button_fire_strategy.py index 70a4f76..44af48b 100644 --- a/src/telegram_handler/button_callback/button_fire_strategy.py +++ b/src/telegram_handler/button_callback/button_fire_strategy.py @@ -1,8 +1,8 @@ from telegram import MaybeInaccessibleMessage from telegram.constants import ReactionEmoji -from db.job_repository import JobRepository -from jobspy import create_logger +from scrapers import create_logger +from model.job_repository import job_repository from telegram_bot import TelegramBot from telegram_handler.button_callback.button_strategy import ButtonStrategy @@ -16,16 +16,15 @@ class FireStrategy(ButtonStrategy): self._message = message self._emoji = ReactionEmoji.FIRE self._telegram_bot = TelegramBot() - self._job_repository = JobRepository() self._job_id = job_id self._logger = create_logger("FireStrategy") async def execute(self): - job = self._job_repository.find_by_id(self._job_id) + job = job_repository.find_by_id(self._job_id) if not job: self._logger.error(f"Job with ID {self._job_id} not found.") return job.applied = True - self._job_repository.update(job) + job_repository.update(job) chat_id = self._message.chat.id await self._telegram_bot.set_message_reaction(chat_id, self._message.message_id, self._emoji) diff --git a/src/telegram_handler/button_callback/button_job_title_strategy.py b/src/telegram_handler/button_callback/button_job_title_strategy.py index a96bbf7..bec2535 100644 --- a/src/telegram_handler/button_callback/button_job_title_strategy.py +++ b/src/telegram_handler/button_callback/button_job_title_strategy.py @@ -1,6 +1,6 @@ from typing import Union -from jobspy import JobPost +from scrapers import JobPost from telegram_bot import TelegramBot from telegram_handler.button_callback.button_strategy import ButtonStrategy diff --git a/src/telegram_handler/button_callback/telegram_callback_handler.py b/src/telegram_handler/button_callback/telegram_callback_handler.py index b43f4cc..051bd8b 100644 --- a/src/telegram_handler/button_callback/telegram_callback_handler.py +++ b/src/telegram_handler/button_callback/telegram_callback_handler.py @@ -3,7 +3,7 @@ from telegram.ext import ( ContextTypes, ) -from jobspy import create_logger +from scrapers import create_logger from telegram_bot import TelegramBot from telegram_handler.button_callback.button_callback_context import ButtonCallBackContext diff --git a/src/telegram_handler/start_handler_constats.py b/src/telegram_handler/start_handler_constats.py new file mode 100644 index 0000000..7a32775 --- /dev/null +++ b/src/telegram_handler/start_handler_constats.py @@ -0,0 +1,46 @@ +START_MESSAGE: str = "Hi there! I'm JobSeeker Bot, your friendly job search assistant.😊\n" \ + "I'm here to help you find the perfect position.\n\n" \ + "To stop chatting with me at any time, just send '/cancel'.\n\n" + +POSITION_MESSAGE: str = "What kind of position are you looking for? ✨\n" \ + "(e.g., Software Engineer, Data Scientist, Marketing Manager)" + +POSITION_NOT_FOUND: str = "I couldn't find any positions matching your request. 😕\n" \ + "Please try again" +multi_value_message: str = "Enter multiple values separated by commas (e.g., value1, value2, value3) ✍️" + +LOCATION_MESSAGE: str = "Where are you hoping to find a position? 🌎\n" \ + "(e.g., Rishon Lezion, New York City, San Francisco)\n\n" + multi_value_message + +EXPERIENCE_MESSAGE: str = "How many years of professional experience do you have in this field? 💼\n" + +EXPERIENCE_INVALID: str = "Oops! Please enter your experience in years as a number.😕" \ + "For example, 2, 5, or 10." + +JOB_AGE_MESSAGE: str = "How recent should the jobs be? ⏰\n" \ + "(Enter the number of hours, e.g., 24 for last 24 hours, 168 for last week)" + +# JOB_AGE_MESSAGE: str = "Within how many hours do you want to see jobs posted? ⏰\n" \ +# "(Enter a number, e.g., 48 for the last 48 hours)" + +JOB_AGE_INVALID: str = "Oops! Please enter a number for the number of hours. 😕\n" \ + "For example, 24, 48, or 168." + +FILTER_TILE_MESSAGE: str = "To help me narrow down your search, tell me about any NOT relevant tags or keywords.\n" \ + "For example: 'remote', 'BI', 'python', 'machine learning', 'QA'.\n\n" + multi_value_message + +THANK_YOU_MESSAGE: str = "Thank you for chatting with JobSeeker Bot!\n\n" \ + "I can help you find jobs on LinkedIn, Glassdoor, and more." + +SEARCH_MESSAGE: str = "To search for jobs on a specific site, simply send the site name:\n" \ + "/linkedin\n" \ + "/indeed\n" \ + "/glassdoor\n" \ + "/goozali\n\n" \ + "Or, use the command /find to search across all supported job boards for a broader search.\n\n" \ + "Let me know how I can assist you further! 😊" + +BYE_MESSAGE: str = "Have a great day!✨\n" \ + "I hope to assist you with your job search in the future.😊" + +VERIFY_MESSAGE: str = "Did you choose: %s ? 🧐" diff --git a/src/telegram_handler/telegram_default_handler.py b/src/telegram_handler/telegram_default_handler.py index c0d5208..3980f81 100644 --- a/src/telegram_handler/telegram_default_handler.py +++ b/src/telegram_handler/telegram_default_handler.py @@ -4,9 +4,10 @@ from telegram.ext import ( ContextTypes, ) -from db.job_repository import JobRepository -from jobspy import Site, scrape_jobs, JobPost -from jobspy.scrapers.utils import create_logger +from scrapers import Site, scrape_jobs, JobPost +from scrapers.utils import create_logger +from model.job_repository import JobRepository +from model.user_repository import user_repository from telegram_bot import TelegramBot from telegram_handler.telegram_handler import TelegramHandler @@ -33,11 +34,8 @@ def map_jobs_to_keyboard(jobs: list[JobPost]) -> InlineKeyboardMarkup: class TelegramDefaultHandler(TelegramHandler): - def __init__(self, sites: list[Site], locations: list[str], title_filters: list[str], search_term: str): + def __init__(self, sites: list[Site]): self.sites_to_scrap = sites - self.locations = locations - self.search_term = search_term - self.title_filters = title_filters self.telegram_bot = TelegramBot() self.jobRepository = JobRepository() if len(sites) == 1: @@ -51,17 +49,21 @@ class TelegramDefaultHandler(TelegramHandler): chat_id = update.message.chat.id await self.telegram_bot.set_message_reaction(chat_id, update.message.message_id, ReactionEmoji.FIRE) + user = user_repository.find_by_username(update.message.from_user.username) + site_names = [site.name for site in self.sites_to_scrap] site_names_print = ", ".join(site_names) + # locations = [location + ", Israel" for location in user.cities] await self.telegram_bot.send_text(chat_id, f"Start scarping: {site_names_print}") filtered_out_jobs, jobs = scrape_jobs( site_name=self.sites_to_scrap, - search_term=self.search_term, - locations=self.locations, + user=user, + search_term=user.position.value, + locations=user.cities, results_wanted=200, - hours_old=48, - filter_by_title=self.title_filters, + hours_old=int(user.job_age), + filter_by_title=user.title_filters, country_indeed='israel' ) self.logger.info(f"Found {len(jobs)} jobs") diff --git a/src/telegram_handler/telegram_myinfo_handler.py b/src/telegram_handler/telegram_myinfo_handler.py new file mode 100644 index 0000000..4097833 --- /dev/null +++ b/src/telegram_handler/telegram_myinfo_handler.py @@ -0,0 +1,29 @@ +from telegram import Update +from telegram.constants import ReactionEmoji +from telegram.ext import ( + ContextTypes, +) + +from scrapers.utils import create_logger +from model.user_repository import user_repository +from telegram_bot import TelegramBot +from telegram_handler.telegram_handler import TelegramHandler + + +class MyInfoTelegramHandler(TelegramHandler): + def __init__(self): + self.telegram_bot = TelegramBot() + self._logger = create_logger("MyInfoTelegramHandler") + + async def handle(self, update: Update, context: ContextTypes.DEFAULT_TYPE): + self._logger.info("start handling") + chat_id = update.message.chat.id + await self.telegram_bot.set_message_reaction(chat_id, + update.message.message_id, ReactionEmoji.FIRE) + user = user_repository.find_by_username(update.message.from_user.username) + await self.telegram_bot.send_text(chat_id, user.get_myinfo_message()) + + self._logger.info("finished handling") + + +my_info_handler = MyInfoTelegramHandler() diff --git a/src/telegram_handler/telegram_start_handler.py b/src/telegram_handler/telegram_start_handler.py new file mode 100644 index 0000000..b00218f --- /dev/null +++ b/src/telegram_handler/telegram_start_handler.py @@ -0,0 +1,216 @@ +from enum import Enum + +from telegram import Update, Chat, KeyboardButton, ReplyKeyboardMarkup, ReplyKeyboardRemove +from telegram.constants import ReactionEmoji +from telegram.ext import ( + ContextTypes, ConversationHandler, CommandHandler, MessageHandler, filters, +) + +from config.cache_manager import cache_manager +from model.Position import Position +from model.User import User +from model.user_repository import user_repository +from scrapers.utils import create_logger +from telegram_bot import TelegramBot +from telegram_handler.start_handler_constats import START_MESSAGE, POSITION_MESSAGE, POSITION_NOT_FOUND, \ + LOCATION_MESSAGE, EXPERIENCE_MESSAGE, FILTER_TILE_MESSAGE, THANK_YOU_MESSAGE, BYE_MESSAGE, VERIFY_MESSAGE, \ + SEARCH_MESSAGE, EXPERIENCE_INVALID, JOB_AGE_INVALID, JOB_AGE_MESSAGE + + +class Flow(Enum): + POSITION = 0 + ADDRESS = 1 + FILTERS = 2 + EXPERIENCE = 3 + VERIFY_ADDRESS = 4 + VERIFY_FILTERS = 5 + SKIP_FILTERS = 6 + JOB_AGE = 7 + + +class TelegramStartHandler: + + def __init__(self): + self.telegram_bot = TelegramBot() + self.logger = create_logger("TelegramStartHandler") + + async def start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Starts the conversation and asks the user about their position.""" + chat: Chat = update.message.chat + user = user_repository.find_by_username(chat.username) + if not user: + user = User(full_name=chat.full_name, username=chat.username, chat_id=chat.id) + user_repository.insert_user(user) + + await update.message.reply_text(START_MESSAGE) + + buttons = [[KeyboardButton(position.value)] for position in Position] + reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, + input_field_placeholder=Flow.POSITION.name) + await update.message.reply_text( + POSITION_MESSAGE, + reply_markup=reply_markup, + ) + + return Flow.POSITION.value + + async def position(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Stores the selected position and asks for a locations.""" + user = update.message.from_user + self.logger.info("Position of %s: %s", user.first_name, update.message.text) + position = next((p for p in Position if p.value == update.message.text), None) + if not position: + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) + await update.message.reply_text(POSITION_NOT_FOUND) + buttons = [[KeyboardButton(position.value)] for position in Position] + reply_markup = ReplyKeyboardMarkup(buttons, one_time_keyboard=True, + input_field_placeholder=Flow.POSITION.name) + await update.message.reply_text( + POSITION_MESSAGE, + reply_markup=reply_markup, + ) + return Flow.POSITION.value + + await update.message.set_reaction(ReactionEmoji.FIRE) + cached_user: User = cache_manager.find(user.username) + cached_user.position = position + cache_manager.save(cached_user.username, cached_user) + await update.message.reply_text(LOCATION_MESSAGE) + + return Flow.ADDRESS.value + + async def address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Asks for a location.""" + cities = update.message.text.split(",") + # Remove leading/trailing spaces from each city name + cities = [city.strip() for city in cities] + await update.message.set_reaction(ReactionEmoji.FIRE) + reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, + input_field_placeholder=Flow.VERIFY_ADDRESS.name) + await update.message.reply_text(VERIFY_MESSAGE % cities, reply_markup=reply_markup) + + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.cities = cities + cache_manager.save(cached_user.username, cached_user) + + return Flow.VERIFY_ADDRESS.value + + async def verify_address(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Verify for a Address.""" + if update.message.text == "No": + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) + await update.message.reply_text(LOCATION_MESSAGE) + return Flow.ADDRESS.value + + await update.message.set_reaction(ReactionEmoji.FIRE) + await update.message.reply_text(EXPERIENCE_MESSAGE) + + return Flow.EXPERIENCE.value + + async def experience(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Asks for a experience.""" + user = update.message.from_user + self.logger.info("Experience of %s: %s", user.first_name, update.message.text) + + if not update.message.text.isnumeric(): + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) + await update.message.reply_text(EXPERIENCE_INVALID) + await update.message.reply_text(EXPERIENCE_MESSAGE) + + return Flow.EXPERIENCE.value + + await update.message.set_reaction(ReactionEmoji.FIRE) + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.experience = update.message.text + cache_manager.save(cached_user.username, cached_user) + await update.message.reply_text(JOB_AGE_MESSAGE) + return Flow.JOB_AGE.value + + async def job_age(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Asks for a Job age in hours.""" + await update.message.set_reaction(ReactionEmoji.FIRE) + user = update.message.from_user + self.logger.info("Job age of %s: %s", user.first_name, update.message.text) + + if not update.message.text.isnumeric(): + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) + await update.message.reply_text(JOB_AGE_INVALID) + await update.message.reply_text(JOB_AGE_MESSAGE) + + return Flow.JOB_AGE.value + await update.message.set_reaction(ReactionEmoji.FIRE) + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.job_age = update.message.text + cache_manager.save(cached_user.username, cached_user) + await update.message.reply_text( + FILTER_TILE_MESSAGE) + + return Flow.FILTERS.value + + async def filters_flow(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Asks for a filters_flow.""" + await update.message.set_reaction(ReactionEmoji.FIRE) + title_filters = update.message.text.split(",") + # Remove leading/trailing spaces from each city name + title_filters = [title_filter.strip() for title_filter in title_filters] + reply_markup = ReplyKeyboardMarkup([[KeyboardButton("Yes"), KeyboardButton("No")]], one_time_keyboard=True, + input_field_placeholder=Flow.VERIFY_FILTERS.name) + await update.message.reply_text(VERIFY_MESSAGE % title_filters, reply_markup=reply_markup) + + cached_user: User = cache_manager.find(update.message.from_user.username) + cached_user.title_filters = title_filters + cache_manager.save(cached_user.username, cached_user) + + return Flow.VERIFY_FILTERS.value + + async def verify_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Verify for a filters_flow.""" + if update.message.text == "No": + await update.message.set_reaction(ReactionEmoji.PILE_OF_POO) + await update.message.reply_text(FILTER_TILE_MESSAGE) + return Flow.FILTERS.value + + await update.message.set_reaction(ReactionEmoji.FIRE) + await update.message.reply_text(THANK_YOU_MESSAGE) + await update.message.reply_text(SEARCH_MESSAGE) + cached_user: User = cache_manager.find(update.message.from_user.username) + user_repository.update(cached_user) + return ConversationHandler.END + + async def skip_filter(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Skips the location and asks for info about the user.""" + await update.message.set_reaction(ReactionEmoji.FIRE) + user = update.message.from_user + self.logger.info("User %s did not send a filters.", user.first_name) + await update.message.reply_text(THANK_YOU_MESSAGE) + await update.message.reply_text(SEARCH_MESSAGE) + + return ConversationHandler.END + + async def cancel(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> int: + """Cancels and ends the conversation.""" + await update.message.set_reaction(ReactionEmoji.FIRE) + user = update.message.from_user + self.logger.info("User %s canceled the conversation.", user.first_name) + await update.message.reply_text( + BYE_MESSAGE, reply_markup=ReplyKeyboardRemove() + ) + cached_user: User = cache_manager.find(user.username) + user_repository.update(cached_user.username, cached_user) + return ConversationHandler.END + + +start_handler = TelegramStartHandler() +start_conv_handler = ConversationHandler( + entry_points=[CommandHandler("start", start_handler.start)], + states={ + Flow.POSITION.value: [MessageHandler(filters.TEXT, start_handler.position)], + Flow.ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.address)], + Flow.VERIFY_ADDRESS.value: [MessageHandler(filters.TEXT, start_handler.verify_address)], + Flow.EXPERIENCE.value: [MessageHandler(filters.TEXT, start_handler.experience)], + Flow.JOB_AGE.value: [MessageHandler(filters.TEXT, start_handler.job_age)], + Flow.FILTERS.value: [MessageHandler(filters.TEXT, start_handler.filters_flow)], + Flow.VERIFY_FILTERS.value: [MessageHandler(filters.TEXT, start_handler.verify_filter)], + }, + fallbacks=[CommandHandler("cancel", start_handler.cancel)], +) diff --git a/tests/test_all.py b/tests/test_all.py index 3285611..6a6ff60 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_db.py b/tests/test_db.py index bbe0e02..e136074 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -1,6 +1,6 @@ from dotenv import load_dotenv -from db.job_repository import JobRepository +from model.job_repository import JobRepository from tests.test_util import createMockJob load_dotenv() diff --git a/tests/test_glassdoor.py b/tests/test_glassdoor.py index 267a3e6..16676ba 100644 --- a/tests/test_glassdoor.py +++ b/tests/test_glassdoor.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_google.py b/tests/test_google.py index 9f30ffe..5fa10f3 100644 --- a/tests/test_google.py +++ b/tests/test_google.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_goozali.py b/tests/test_goozali.py index 2f59956..9f68ab5 100644 --- a/tests/test_goozali.py +++ b/tests/test_goozali.py @@ -1,12 +1,12 @@ import json import os -from jobspy.jobs import JobPost -from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper -from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent -from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column -from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData -from jobspy.scrapers.utils import create_dict_by_key_and_value +from jobs import JobPost +from scrapers.goozali.GoozaliMapper import GoozaliMapper +from scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent +from scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column +from scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData +from scrapers.utils import create_dict_by_key_and_value # URL Example # https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D diff --git a/tests/test_indeed.py b/tests/test_indeed.py index 714fc53..0468afb 100644 --- a/tests/test_indeed.py +++ b/tests/test_indeed.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_linkedin.py b/tests/test_linkedin.py index 0cb5ec4..29d0bf8 100644 --- a/tests/test_linkedin.py +++ b/tests/test_linkedin.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd diff --git a/tests/test_util.py b/tests/test_util.py index 5ad8751..bfc3f8c 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,7 +1,7 @@ from datetime import datetime, date from typing import List -from jobspy import JobPost, Location, Country +from scrapers import JobPost, Location, Country # Creating some test job posts diff --git a/tests/test_ziprecruiter.py b/tests/test_ziprecruiter.py index 61de491..a023590 100644 --- a/tests/test_ziprecruiter.py +++ b/tests/test_ziprecruiter.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from scrapers import scrape_jobs import pandas as pd