created mapper for position to goozali field type

next to fix circular depand
pull/231/head
Yariv Menachem 2025-01-06 14:24:51 +02:00
parent c570f53e5b
commit fced92f871
5 changed files with 95 additions and 27 deletions

View File

@ -1,5 +1,6 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from . import GoozaliFieldChoice
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
from ..utils import create_logger from ..utils import create_logger
@ -39,13 +40,13 @@ class GoozaliScrapperComponent:
if (column.name == column_name): if (column.name == column_name):
return column return column
def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice: def find_choice_from_column(self, column: GoozaliColumn, choice_name: GoozaliFieldChoice) -> GoozaliColumnChoice:
if not column.typeOptions.choices: if not column.typeOptions.choices:
logger.exception(f"Choices for column {column.name} doesn't exist") logger.exception(f"Choices for column {column.name} doesn't exist")
raise Exception(f"Choices for column {column.name} doesn't exist") raise Exception(f"Choices for column {column.name} doesn't exist")
for key, choice in column.typeOptions.choices.items(): for key, choice in column.typeOptions.choices.items():
if (choice.name == choice_name): if choice.name == choice_name.value:
return choice return choice
logger.exception(f"Can't find {choice_name} for column {column.name}") logger.exception(f"Can't find {choice_name} for column {column.name}")

View File

@ -7,11 +7,12 @@ This module contains routines to scrape Goozali.
from __future__ import annotations from __future__ import annotations
from model.User import User
from model.user_repository import user_repository
from .. import Scraper, ScraperInput from .. import Scraper, ScraperInput
from .GoozaliMapper import GoozaliMapper from .GoozaliMapper import GoozaliMapper
from .GoozaliScrapperComponent import GoozaliScrapperComponent from .GoozaliScrapperComponent import GoozaliScrapperComponent
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
from ..site import Site from ..site import Site
@ -20,6 +21,7 @@ from ...jobs import (
JobPost, JobPost,
JobResponse, JobResponse,
) )
logger = create_logger("Goozali") logger = create_logger("Goozali")
@ -67,7 +69,7 @@ class GoozaliScraper(Scraper):
logger.info(f"response: {str(response)}") logger.info(f"response: {str(response)}")
if (response.status_code != 200): if (response.status_code != 200):
logger.error(f"Status code: {response.status_code}, Error: { logger.error(f"Status code: {response.status_code}, Error: {
str(response.text)}") str(response.text)}")
return JobResponse(jobs=job_list) return JobResponse(jobs=job_list)
except Exception as e: except Exception as e:
logger.error(f"Exception: {str(e)}") logger.error(f"Exception: {str(e)}")
@ -79,8 +81,10 @@ class GoozaliScraper(Scraper):
# filter result by Field # filter result by Field
column = self.component.find_column( column = self.component.find_column(
goozali_response.data.columns, job_post_column_to_goozali_column["field"]) goozali_response.data.columns, job_post_column_to_goozali_column["field"])
user: User = user_repository.find_by_username()
user_goozali_field = position_to_goozali_field_map[user.position]
column_choice = self.component.find_choice_from_column( column_choice = self.component.find_choice_from_column(
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value) column, user_goozali_field)
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice( filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
goozali_response.data.rows, column, column_choice) goozali_response.data.rows, column, column_choice)
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours( filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(

View File

@ -1,6 +1,7 @@
from model.Position import Position
from . import GoozaliFieldChoice
from .model import GoozaliColumn from .model import GoozaliColumn
job_post_column_to_goozali_column = { job_post_column_to_goozali_column = {
"date_posted": "Discovered", "date_posted": "Discovered",
"field": "Field", "field": "Field",
@ -23,6 +24,69 @@ job_post_column_names = ["id",
"location", "location",
"company_industry"] "company_industry"]
fields = ["Product Management",
"Data Analyst",
"Data Science, ML & Algorithms",
"Software Engineering",
"QA",
"Cybersecurity",
"IT and System Administration",
"Frontend Development",
"DevOps",
"UI/UX, Design & Content",
"HR & Recruitment",
"Mobile Development",
"Hardware Engineering",
"Embedded, Low Level & Firmware Engineering",
"Customer Success",
"Project Management",
"Operations",
"Finance",
"Systems Engineering",
"Marketing",
"Sales",
"Compliance, Legal & Policy",
"C-Level",
"Business Development",
"Mechanical Engineering",
"Natural Science",
"Other"]
def create_position_to_goozali_field_map():
"""
Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values.
Returns:
dict: A dictionary mapping Position to a list of GoozaliFieldChoice.
"""
position_to_goozali_map = {
Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST],
Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT],
Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT],
Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS],
Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT],
Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY],
Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES],
Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
}
return position_to_goozali_map
# Get the map
position_to_goozali_field_map = create_position_to_goozali_field_map()
# Key mapper: Extract 'name' as the key # Key mapper: Extract 'name' as the key
def extract_goozali_column_name(column): return column.name if isinstance( def extract_goozali_column_name(column): return column.name if isinstance(

View File

@ -5,22 +5,22 @@ class Position(str, Enum):
BACKEND_DEVELOPER = "Backend Developer" BACKEND_DEVELOPER = "Backend Developer"
FULLSTACK_DEVELOPER = "Fullstack Developer" FULLSTACK_DEVELOPER = "Fullstack Developer"
FRONTEND_DEVELOPER = "Frontend Developer" FRONTEND_DEVELOPER = "Frontend Developer"
DATA_SCIENTIST="Data Scientist" DATA_SCIENTIST = "Data Scientist"
DATA_ANALYST="Data Analyst" DATA_ANALYST = "Data Analyst"
PROJECT_MANAGER="Project Manager" PROJECT_MANAGER = "Project Manager"
CLOUD_ENGINEER="Cloud Engineer" CLOUD_ENGINEER = "Cloud Engineer"
CLOUD_ARCHITECT="Cloud Architect" CLOUD_ARCHITECT = "Cloud Architect"
UX_UI_DESIGNER="UX/UI Designer" UX_UI_DESIGNER = "UX/UI Designer"
PRODUCT_MANAGER="Product Manager" PRODUCT_MANAGER = "Product Manager"
DEV_OPS_ENGINEER="DevOps Engineer" DEV_OPS_ENGINEER = "DevOps Engineer"
BUSINESS_ANALYST="Business Analyst" BUSINESS_ANALYST = "Business Analyst"
CYBERSECURITY_ENGINEER="Cybersecurity Engineer" CYBERSECURITY_ENGINEER = "Cybersecurity Engineer"
MACHINE_LEARNING_ENGINEER="Machine Learning Engineer" MACHINE_LEARNING_ENGINEER = "Machine Learning Engineer"
ARTIFICIAL_INTELLIGENCE_ENGINEER="Artificial Intelligence Engineer" ARTIFICIAL_INTELLIGENCE_ENGINEER = "Artificial Intelligence Engineer"
DATABASE_ADMINISTRATOR="Database Administrator" DATABASE_ADMINISTRATOR = "Database Administrator"
SYSTEMS_ADMINISTRATOR="Systems Administrator" SYSTEMS_ADMINISTRATOR = "Systems Administrator"
NETWORK_ENGINEER="Network Engineer" NETWORK_ENGINEER = "Network Engineer"
TECHNICAL_SUPPORT_SPECIALIST="Technical Support Specialist" TECHNICAL_SUPPORT_SPECIALIST = "Technical Support Specialist"
SALES_ENGINEER="Sales Engineer" SALES_ENGINEER = "Sales Engineer"
SCRUM_MASTER="Scrum Master" SCRUM_MASTER = "Scrum Master"
IT_MANAGER="IT Manager" IT_MANAGER = "IT Manager"

View File

@ -1,6 +1,5 @@
from typing import Optional from typing import Optional
from cachebox import LRUCache
from dotenv import load_dotenv from dotenv import load_dotenv
from pymongo import UpdateOne from pymongo import UpdateOne