created mapper for position to goozali field type

next to fix circular depand
pull/231/head
Yariv Menachem 2025-01-06 14:24:51 +02:00
parent c570f53e5b
commit fced92f871
5 changed files with 95 additions and 27 deletions

View File

@ -1,5 +1,6 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from . import GoozaliFieldChoice
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
from ..utils import create_logger from ..utils import create_logger
@ -39,13 +40,13 @@ class GoozaliScrapperComponent:
if (column.name == column_name): if (column.name == column_name):
return column return column
def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice: def find_choice_from_column(self, column: GoozaliColumn, choice_name: GoozaliFieldChoice) -> GoozaliColumnChoice:
if not column.typeOptions.choices: if not column.typeOptions.choices:
logger.exception(f"Choices for column {column.name} doesn't exist") logger.exception(f"Choices for column {column.name} doesn't exist")
raise Exception(f"Choices for column {column.name} doesn't exist") raise Exception(f"Choices for column {column.name} doesn't exist")
for key, choice in column.typeOptions.choices.items(): for key, choice in column.typeOptions.choices.items():
if (choice.name == choice_name): if choice.name == choice_name.value:
return choice return choice
logger.exception(f"Can't find {choice_name} for column {column.name}") logger.exception(f"Can't find {choice_name} for column {column.name}")

View File

@ -7,11 +7,12 @@ This module contains routines to scrape Goozali.
from __future__ import annotations from __future__ import annotations
from model.User import User
from model.user_repository import user_repository
from .. import Scraper, ScraperInput from .. import Scraper, ScraperInput
from .GoozaliMapper import GoozaliMapper from .GoozaliMapper import GoozaliMapper
from .GoozaliScrapperComponent import GoozaliScrapperComponent from .GoozaliScrapperComponent import GoozaliScrapperComponent
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
from ..site import Site from ..site import Site
@ -20,6 +21,7 @@ from ...jobs import (
JobPost, JobPost,
JobResponse, JobResponse,
) )
logger = create_logger("Goozali") logger = create_logger("Goozali")
@ -79,8 +81,10 @@ class GoozaliScraper(Scraper):
# filter result by Field # filter result by Field
column = self.component.find_column( column = self.component.find_column(
goozali_response.data.columns, job_post_column_to_goozali_column["field"]) goozali_response.data.columns, job_post_column_to_goozali_column["field"])
user: User = user_repository.find_by_username()
user_goozali_field = position_to_goozali_field_map[user.position]
column_choice = self.component.find_choice_from_column( column_choice = self.component.find_choice_from_column(
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value) column, user_goozali_field)
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice( filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
goozali_response.data.rows, column, column_choice) goozali_response.data.rows, column, column_choice)
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours( filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(

View File

@ -1,6 +1,7 @@
from model.Position import Position
from . import GoozaliFieldChoice
from .model import GoozaliColumn from .model import GoozaliColumn
job_post_column_to_goozali_column = { job_post_column_to_goozali_column = {
"date_posted": "Discovered", "date_posted": "Discovered",
"field": "Field", "field": "Field",
@ -23,6 +24,69 @@ job_post_column_names = ["id",
"location", "location",
"company_industry"] "company_industry"]
fields = ["Product Management",
"Data Analyst",
"Data Science, ML & Algorithms",
"Software Engineering",
"QA",
"Cybersecurity",
"IT and System Administration",
"Frontend Development",
"DevOps",
"UI/UX, Design & Content",
"HR & Recruitment",
"Mobile Development",
"Hardware Engineering",
"Embedded, Low Level & Firmware Engineering",
"Customer Success",
"Project Management",
"Operations",
"Finance",
"Systems Engineering",
"Marketing",
"Sales",
"Compliance, Legal & Policy",
"C-Level",
"Business Development",
"Mechanical Engineering",
"Natural Science",
"Other"]
def create_position_to_goozali_field_map():
"""
Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values.
Returns:
dict: A dictionary mapping Position to a list of GoozaliFieldChoice.
"""
position_to_goozali_map = {
Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST],
Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT],
Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT],
Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS],
Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT],
Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY],
Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES],
Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
}
return position_to_goozali_map
# Get the map
position_to_goozali_field_map = create_position_to_goozali_field_map()
# Key mapper: Extract 'name' as the key # Key mapper: Extract 'name' as the key
def extract_goozali_column_name(column): return column.name if isinstance( def extract_goozali_column_name(column): return column.name if isinstance(

View File

@ -1,6 +1,5 @@
from typing import Optional from typing import Optional
from cachebox import LRUCache
from dotenv import load_dotenv from dotenv import load_dotenv
from pymongo import UpdateOne from pymongo import UpdateOne