created mapper for position to goozali field type

next to fix circular depand
pull/231/head
Yariv Menachem 2025-01-06 14:24:51 +02:00
parent c570f53e5b
commit fced92f871
5 changed files with 95 additions and 27 deletions

View File

@ -1,5 +1,6 @@
from datetime import datetime, timedelta
from . import GoozaliFieldChoice
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
from ..utils import create_logger
@ -39,13 +40,13 @@ class GoozaliScrapperComponent:
if (column.name == column_name):
return column
def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice:
def find_choice_from_column(self, column: GoozaliColumn, choice_name: GoozaliFieldChoice) -> GoozaliColumnChoice:
if not column.typeOptions.choices:
logger.exception(f"Choices for column {column.name} doesn't exist")
raise Exception(f"Choices for column {column.name} doesn't exist")
for key, choice in column.typeOptions.choices.items():
if (choice.name == choice_name):
if choice.name == choice_name.value:
return choice
logger.exception(f"Can't find {choice_name} for column {column.name}")

View File

@ -7,11 +7,12 @@ This module contains routines to scrape Goozali.
from __future__ import annotations
from model.User import User
from model.user_repository import user_repository
from .. import Scraper, ScraperInput
from .GoozaliMapper import GoozaliMapper
from .GoozaliScrapperComponent import GoozaliScrapperComponent
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
from ..site import Site
@ -20,6 +21,7 @@ from ...jobs import (
JobPost,
JobResponse,
)
logger = create_logger("Goozali")
@ -67,7 +69,7 @@ class GoozaliScraper(Scraper):
logger.info(f"response: {str(response)}")
if (response.status_code != 200):
logger.error(f"Status code: {response.status_code}, Error: {
str(response.text)}")
str(response.text)}")
return JobResponse(jobs=job_list)
except Exception as e:
logger.error(f"Exception: {str(e)}")
@ -79,8 +81,10 @@ class GoozaliScraper(Scraper):
# filter result by Field
column = self.component.find_column(
goozali_response.data.columns, job_post_column_to_goozali_column["field"])
user: User = user_repository.find_by_username()
user_goozali_field = position_to_goozali_field_map[user.position]
column_choice = self.component.find_choice_from_column(
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value)
column, user_goozali_field)
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
goozali_response.data.rows, column, column_choice)
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(

View File

@ -1,6 +1,7 @@
from model.Position import Position
from . import GoozaliFieldChoice
from .model import GoozaliColumn
job_post_column_to_goozali_column = {
"date_posted": "Discovered",
"field": "Field",
@ -23,6 +24,69 @@ job_post_column_names = ["id",
"location",
"company_industry"]
fields = ["Product Management",
"Data Analyst",
"Data Science, ML & Algorithms",
"Software Engineering",
"QA",
"Cybersecurity",
"IT and System Administration",
"Frontend Development",
"DevOps",
"UI/UX, Design & Content",
"HR & Recruitment",
"Mobile Development",
"Hardware Engineering",
"Embedded, Low Level & Firmware Engineering",
"Customer Success",
"Project Management",
"Operations",
"Finance",
"Systems Engineering",
"Marketing",
"Sales",
"Compliance, Legal & Policy",
"C-Level",
"Business Development",
"Mechanical Engineering",
"Natural Science",
"Other"]
def create_position_to_goozali_field_map():
"""
Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values.
Returns:
dict: A dictionary mapping Position to a list of GoozaliFieldChoice.
"""
position_to_goozali_map = {
Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING],
Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST],
Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT],
Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT],
Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS],
Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT],
Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY],
Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES],
Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
}
return position_to_goozali_map
# Get the map
position_to_goozali_field_map = create_position_to_goozali_field_map()
# Key mapper: Extract 'name' as the key
def extract_goozali_column_name(column): return column.name if isinstance(

View File

@ -5,22 +5,22 @@ class Position(str, Enum):
BACKEND_DEVELOPER = "Backend Developer"
FULLSTACK_DEVELOPER = "Fullstack Developer"
FRONTEND_DEVELOPER = "Frontend Developer"
DATA_SCIENTIST="Data Scientist"
DATA_ANALYST="Data Analyst"
PROJECT_MANAGER="Project Manager"
CLOUD_ENGINEER="Cloud Engineer"
CLOUD_ARCHITECT="Cloud Architect"
UX_UI_DESIGNER="UX/UI Designer"
PRODUCT_MANAGER="Product Manager"
DEV_OPS_ENGINEER="DevOps Engineer"
BUSINESS_ANALYST="Business Analyst"
CYBERSECURITY_ENGINEER="Cybersecurity Engineer"
MACHINE_LEARNING_ENGINEER="Machine Learning Engineer"
ARTIFICIAL_INTELLIGENCE_ENGINEER="Artificial Intelligence Engineer"
DATABASE_ADMINISTRATOR="Database Administrator"
SYSTEMS_ADMINISTRATOR="Systems Administrator"
NETWORK_ENGINEER="Network Engineer"
TECHNICAL_SUPPORT_SPECIALIST="Technical Support Specialist"
SALES_ENGINEER="Sales Engineer"
SCRUM_MASTER="Scrum Master"
IT_MANAGER="IT Manager"
DATA_SCIENTIST = "Data Scientist"
DATA_ANALYST = "Data Analyst"
PROJECT_MANAGER = "Project Manager"
CLOUD_ENGINEER = "Cloud Engineer"
CLOUD_ARCHITECT = "Cloud Architect"
UX_UI_DESIGNER = "UX/UI Designer"
PRODUCT_MANAGER = "Product Manager"
DEV_OPS_ENGINEER = "DevOps Engineer"
BUSINESS_ANALYST = "Business Analyst"
CYBERSECURITY_ENGINEER = "Cybersecurity Engineer"
MACHINE_LEARNING_ENGINEER = "Machine Learning Engineer"
ARTIFICIAL_INTELLIGENCE_ENGINEER = "Artificial Intelligence Engineer"
DATABASE_ADMINISTRATOR = "Database Administrator"
SYSTEMS_ADMINISTRATOR = "Systems Administrator"
NETWORK_ENGINEER = "Network Engineer"
TECHNICAL_SUPPORT_SPECIALIST = "Technical Support Specialist"
SALES_ENGINEER = "Sales Engineer"
SCRUM_MASTER = "Scrum Master"
IT_MANAGER = "IT Manager"

View File

@ -1,6 +1,5 @@
from typing import Optional
from cachebox import LRUCache
from dotenv import load_dotenv
from pymongo import UpdateOne