From fced92f8713d136ad8b73ae58fbd53ad956b95e1 Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Mon, 6 Jan 2025 14:24:51 +0200 Subject: [PATCH] created mapper for position to goozali field type next to fix circular depand --- .../goozali/GoozaliScrapperComponent.py | 5 +- src/jobspy/scrapers/goozali/__init__.py | 12 ++-- src/jobspy/scrapers/goozali/constants.py | 66 ++++++++++++++++++- src/model/Position.py | 38 +++++------ src/model/user_repository.py | 1 - 5 files changed, 95 insertions(+), 27 deletions(-) diff --git a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py b/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py index bcab0d3..06e9a5c 100644 --- a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py +++ b/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta +from . import GoozaliFieldChoice from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice from ..utils import create_logger @@ -39,13 +40,13 @@ class GoozaliScrapperComponent: if (column.name == column_name): return column - def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice: + def find_choice_from_column(self, column: GoozaliColumn, choice_name: GoozaliFieldChoice) -> GoozaliColumnChoice: if not column.typeOptions.choices: logger.exception(f"Choices for column {column.name} doesn't exist") raise Exception(f"Choices for column {column.name} doesn't exist") for key, choice in column.typeOptions.choices.items(): - if (choice.name == choice_name): + if choice.name == choice_name.value: return choice logger.exception(f"Can't find {choice_name} for column {column.name}") diff --git a/src/jobspy/scrapers/goozali/__init__.py b/src/jobspy/scrapers/goozali/__init__.py index 90f18eb..59e334e 100644 --- a/src/jobspy/scrapers/goozali/__init__.py +++ b/src/jobspy/scrapers/goozali/__init__.py @@ -7,11 +7,12 @@ This module contains routines to scrape Goozali. from __future__ import annotations - +from model.User import User +from model.user_repository import user_repository from .. import Scraper, ScraperInput from .GoozaliMapper import GoozaliMapper from .GoozaliScrapperComponent import GoozaliScrapperComponent -from .constants import extract_goozali_column_name, job_post_column_to_goozali_column +from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest from ..site import Site @@ -20,6 +21,7 @@ from ...jobs import ( JobPost, JobResponse, ) + logger = create_logger("Goozali") @@ -67,7 +69,7 @@ class GoozaliScraper(Scraper): logger.info(f"response: {str(response)}") if (response.status_code != 200): logger.error(f"Status code: {response.status_code}, Error: { - str(response.text)}") + str(response.text)}") return JobResponse(jobs=job_list) except Exception as e: logger.error(f"Exception: {str(e)}") @@ -79,8 +81,10 @@ class GoozaliScraper(Scraper): # filter result by Field column = self.component.find_column( goozali_response.data.columns, job_post_column_to_goozali_column["field"]) + user: User = user_repository.find_by_username() + user_goozali_field = position_to_goozali_field_map[user.position] column_choice = self.component.find_choice_from_column( - column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value) + column, user_goozali_field) filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice( goozali_response.data.rows, column, column_choice) filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours( diff --git a/src/jobspy/scrapers/goozali/constants.py b/src/jobspy/scrapers/goozali/constants.py index 458320f..2f719c5 100644 --- a/src/jobspy/scrapers/goozali/constants.py +++ b/src/jobspy/scrapers/goozali/constants.py @@ -1,6 +1,7 @@ +from model.Position import Position +from . import GoozaliFieldChoice from .model import GoozaliColumn - job_post_column_to_goozali_column = { "date_posted": "Discovered", "field": "Field", @@ -23,6 +24,69 @@ job_post_column_names = ["id", "location", "company_industry"] +fields = ["Product Management", + "Data Analyst", + "Data Science, ML & Algorithms", + "Software Engineering", + "QA", + "Cybersecurity", + "IT and System Administration", + "Frontend Development", + "DevOps", + "UI/UX, Design & Content", + "HR & Recruitment", + "Mobile Development", + "Hardware Engineering", + "Embedded, Low Level & Firmware Engineering", + "Customer Success", + "Project Management", + "Operations", + "Finance", + "Systems Engineering", + "Marketing", + "Sales", + "Compliance, Legal & Policy", + "C-Level", + "Business Development", + "Mechanical Engineering", + "Natural Science", + "Other"] + +def create_position_to_goozali_field_map(): + """ + Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values. + + Returns: + dict: A dictionary mapping Position to a list of GoozaliFieldChoice. + """ + position_to_goozali_map = { + Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING], + Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST], + Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT], + Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT], + Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT], + Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS], + Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT], + Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY], + Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS], + Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES], + Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT], + Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION], + } + return position_to_goozali_map + +# Get the map +position_to_goozali_field_map = create_position_to_goozali_field_map() # Key mapper: Extract 'name' as the key def extract_goozali_column_name(column): return column.name if isinstance( diff --git a/src/model/Position.py b/src/model/Position.py index 9bdf360..7717206 100644 --- a/src/model/Position.py +++ b/src/model/Position.py @@ -5,22 +5,22 @@ class Position(str, Enum): BACKEND_DEVELOPER = "Backend Developer" FULLSTACK_DEVELOPER = "Fullstack Developer" FRONTEND_DEVELOPER = "Frontend Developer" - DATA_SCIENTIST="Data Scientist" - DATA_ANALYST="Data Analyst" - PROJECT_MANAGER="Project Manager" - CLOUD_ENGINEER="Cloud Engineer" - CLOUD_ARCHITECT="Cloud Architect" - UX_UI_DESIGNER="UX/UI Designer" - PRODUCT_MANAGER="Product Manager" - DEV_OPS_ENGINEER="DevOps Engineer" - BUSINESS_ANALYST="Business Analyst" - CYBERSECURITY_ENGINEER="Cybersecurity Engineer" - MACHINE_LEARNING_ENGINEER="Machine Learning Engineer" - ARTIFICIAL_INTELLIGENCE_ENGINEER="Artificial Intelligence Engineer" - DATABASE_ADMINISTRATOR="Database Administrator" - SYSTEMS_ADMINISTRATOR="Systems Administrator" - NETWORK_ENGINEER="Network Engineer" - TECHNICAL_SUPPORT_SPECIALIST="Technical Support Specialist" - SALES_ENGINEER="Sales Engineer" - SCRUM_MASTER="Scrum Master" - IT_MANAGER="IT Manager" + DATA_SCIENTIST = "Data Scientist" + DATA_ANALYST = "Data Analyst" + PROJECT_MANAGER = "Project Manager" + CLOUD_ENGINEER = "Cloud Engineer" + CLOUD_ARCHITECT = "Cloud Architect" + UX_UI_DESIGNER = "UX/UI Designer" + PRODUCT_MANAGER = "Product Manager" + DEV_OPS_ENGINEER = "DevOps Engineer" + BUSINESS_ANALYST = "Business Analyst" + CYBERSECURITY_ENGINEER = "Cybersecurity Engineer" + MACHINE_LEARNING_ENGINEER = "Machine Learning Engineer" + ARTIFICIAL_INTELLIGENCE_ENGINEER = "Artificial Intelligence Engineer" + DATABASE_ADMINISTRATOR = "Database Administrator" + SYSTEMS_ADMINISTRATOR = "Systems Administrator" + NETWORK_ENGINEER = "Network Engineer" + TECHNICAL_SUPPORT_SPECIALIST = "Technical Support Specialist" + SALES_ENGINEER = "Sales Engineer" + SCRUM_MASTER = "Scrum Master" + IT_MANAGER = "IT Manager" diff --git a/src/model/user_repository.py b/src/model/user_repository.py index f7ca798..9df7edc 100644 --- a/src/model/user_repository.py +++ b/src/model/user_repository.py @@ -1,6 +1,5 @@ from typing import Optional -from cachebox import LRUCache from dotenv import load_dotenv from pymongo import UpdateOne