mirror of https://github.com/Bunsly/JobSpy
created mapper for position to goozali field type
next to fix circular depandpull/231/head
parent
c570f53e5b
commit
fced92f871
|
@ -1,5 +1,6 @@
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
from . import GoozaliFieldChoice
|
||||||
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
|
from .model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
|
||||||
from ..utils import create_logger
|
from ..utils import create_logger
|
||||||
|
|
||||||
|
@ -39,13 +40,13 @@ class GoozaliScrapperComponent:
|
||||||
if (column.name == column_name):
|
if (column.name == column_name):
|
||||||
return column
|
return column
|
||||||
|
|
||||||
def find_choice_from_column(self, column: GoozaliColumn, choice_name: str) -> GoozaliColumnChoice:
|
def find_choice_from_column(self, column: GoozaliColumn, choice_name: GoozaliFieldChoice) -> GoozaliColumnChoice:
|
||||||
if not column.typeOptions.choices:
|
if not column.typeOptions.choices:
|
||||||
logger.exception(f"Choices for column {column.name} doesn't exist")
|
logger.exception(f"Choices for column {column.name} doesn't exist")
|
||||||
raise Exception(f"Choices for column {column.name} doesn't exist")
|
raise Exception(f"Choices for column {column.name} doesn't exist")
|
||||||
|
|
||||||
for key, choice in column.typeOptions.choices.items():
|
for key, choice in column.typeOptions.choices.items():
|
||||||
if (choice.name == choice_name):
|
if choice.name == choice_name.value:
|
||||||
return choice
|
return choice
|
||||||
|
|
||||||
logger.exception(f"Can't find {choice_name} for column {column.name}")
|
logger.exception(f"Can't find {choice_name} for column {column.name}")
|
||||||
|
|
|
@ -7,11 +7,12 @@ This module contains routines to scrape Goozali.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from model.User import User
|
||||||
|
from model.user_repository import user_repository
|
||||||
from .. import Scraper, ScraperInput
|
from .. import Scraper, ScraperInput
|
||||||
from .GoozaliMapper import GoozaliMapper
|
from .GoozaliMapper import GoozaliMapper
|
||||||
from .GoozaliScrapperComponent import GoozaliScrapperComponent
|
from .GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||||
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column
|
from .constants import extract_goozali_column_name, job_post_column_to_goozali_column, position_to_goozali_field_map
|
||||||
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
|
from .model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
|
||||||
from ..site import Site
|
from ..site import Site
|
||||||
|
|
||||||
|
@ -20,6 +21,7 @@ from ...jobs import (
|
||||||
JobPost,
|
JobPost,
|
||||||
JobResponse,
|
JobResponse,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = create_logger("Goozali")
|
logger = create_logger("Goozali")
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,8 +81,10 @@ class GoozaliScraper(Scraper):
|
||||||
# filter result by Field
|
# filter result by Field
|
||||||
column = self.component.find_column(
|
column = self.component.find_column(
|
||||||
goozali_response.data.columns, job_post_column_to_goozali_column["field"])
|
goozali_response.data.columns, job_post_column_to_goozali_column["field"])
|
||||||
|
user: User = user_repository.find_by_username()
|
||||||
|
user_goozali_field = position_to_goozali_field_map[user.position]
|
||||||
column_choice = self.component.find_choice_from_column(
|
column_choice = self.component.find_choice_from_column(
|
||||||
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value)
|
column, user_goozali_field)
|
||||||
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
|
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
|
||||||
goozali_response.data.rows, column, column_choice)
|
goozali_response.data.rows, column, column_choice)
|
||||||
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(
|
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
from model.Position import Position
|
||||||
|
from . import GoozaliFieldChoice
|
||||||
from .model import GoozaliColumn
|
from .model import GoozaliColumn
|
||||||
|
|
||||||
|
|
||||||
job_post_column_to_goozali_column = {
|
job_post_column_to_goozali_column = {
|
||||||
"date_posted": "Discovered",
|
"date_posted": "Discovered",
|
||||||
"field": "Field",
|
"field": "Field",
|
||||||
|
@ -23,6 +24,69 @@ job_post_column_names = ["id",
|
||||||
"location",
|
"location",
|
||||||
"company_industry"]
|
"company_industry"]
|
||||||
|
|
||||||
|
fields = ["Product Management",
|
||||||
|
"Data Analyst",
|
||||||
|
"Data Science, ML & Algorithms",
|
||||||
|
"Software Engineering",
|
||||||
|
"QA",
|
||||||
|
"Cybersecurity",
|
||||||
|
"IT and System Administration",
|
||||||
|
"Frontend Development",
|
||||||
|
"DevOps",
|
||||||
|
"UI/UX, Design & Content",
|
||||||
|
"HR & Recruitment",
|
||||||
|
"Mobile Development",
|
||||||
|
"Hardware Engineering",
|
||||||
|
"Embedded, Low Level & Firmware Engineering",
|
||||||
|
"Customer Success",
|
||||||
|
"Project Management",
|
||||||
|
"Operations",
|
||||||
|
"Finance",
|
||||||
|
"Systems Engineering",
|
||||||
|
"Marketing",
|
||||||
|
"Sales",
|
||||||
|
"Compliance, Legal & Policy",
|
||||||
|
"C-Level",
|
||||||
|
"Business Development",
|
||||||
|
"Mechanical Engineering",
|
||||||
|
"Natural Science",
|
||||||
|
"Other"]
|
||||||
|
|
||||||
|
def create_position_to_goozali_field_map():
|
||||||
|
"""
|
||||||
|
Creates a map with Position as keys and a list of relevant GoozaliFieldChoice as values.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary mapping Position to a list of GoozaliFieldChoice.
|
||||||
|
"""
|
||||||
|
position_to_goozali_map = {
|
||||||
|
Position.BACKEND_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
|
||||||
|
Position.FULLSTACK_DEVELOPER: [GoozaliFieldChoice.SOFTWARE_ENGINEERING],
|
||||||
|
Position.FRONTEND_DEVELOPER: [GoozaliFieldChoice.FRONTEND_DEVELOPMENT, GoozaliFieldChoice.SOFTWARE_ENGINEERING],
|
||||||
|
Position.DATA_SCIENTIST: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
|
||||||
|
Position.DATA_ANALYST: [GoozaliFieldChoice.DATA_ANALYST],
|
||||||
|
Position.PROJECT_MANAGER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
|
||||||
|
Position.CLOUD_ENGINEER: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||||
|
Position.CLOUD_ARCHITECT: [GoozaliFieldChoice.DEVOPS, GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||||
|
Position.UX_UI_DESIGNER: [GoozaliFieldChoice.UI_UX_DESIGN_CONTENT],
|
||||||
|
Position.PRODUCT_MANAGER: [GoozaliFieldChoice.PRODUCT_MANAGEMENT],
|
||||||
|
Position.DEV_OPS_ENGINEER: [GoozaliFieldChoice.DEVOPS],
|
||||||
|
Position.BUSINESS_ANALYST: [GoozaliFieldChoice.BUSINESS_DEVELOPMENT],
|
||||||
|
Position.CYBERSECURITY_ENGINEER: [GoozaliFieldChoice.CYBERSECURITY],
|
||||||
|
Position.MACHINE_LEARNING_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
|
||||||
|
Position.ARTIFICIAL_INTELLIGENCE_ENGINEER: [GoozaliFieldChoice.DATA_SCIENCE_ML_ALGORITHMS],
|
||||||
|
Position.DATABASE_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||||
|
Position.SYSTEMS_ADMINISTRATOR: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||||
|
Position.NETWORK_ENGINEER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||||
|
Position.TECHNICAL_SUPPORT_SPECIALIST: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||||
|
Position.SALES_ENGINEER: [GoozaliFieldChoice.SALES],
|
||||||
|
Position.SCRUM_MASTER: [GoozaliFieldChoice.PROJECT_MANAGEMENT],
|
||||||
|
Position.IT_MANAGER: [GoozaliFieldChoice.IT_AND_SYSTEM_ADMINISTRATION],
|
||||||
|
}
|
||||||
|
return position_to_goozali_map
|
||||||
|
|
||||||
|
# Get the map
|
||||||
|
position_to_goozali_field_map = create_position_to_goozali_field_map()
|
||||||
|
|
||||||
# Key mapper: Extract 'name' as the key
|
# Key mapper: Extract 'name' as the key
|
||||||
def extract_goozali_column_name(column): return column.name if isinstance(
|
def extract_goozali_column_name(column): return column.name if isinstance(
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from cachebox import LRUCache
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from pymongo import UpdateOne
|
from pymongo import UpdateOne
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue