From ead8eb126dd073f08a09461fb102c02d788fb3dc Mon Sep 17 00:00:00 2001 From: Yariv Menachem Date: Wed, 18 Dec 2024 14:30:21 +0200 Subject: [PATCH] fixed datetime import, new function to create dicts --- src/jobspy/scrapers/goozali/GoozaliMapper.py | 5 +++ .../goozali/GoozaliScrapperComponent.py | 5 +-- src/jobspy/scrapers/utils.py | 38 +++++++++++++++++++ src/tests/test_goozali.py | 10 ++++- 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/jobspy/scrapers/goozali/GoozaliMapper.py b/src/jobspy/scrapers/goozali/GoozaliMapper.py index f204296..fbcf57b 100644 --- a/src/jobspy/scrapers/goozali/GoozaliMapper.py +++ b/src/jobspy/scrapers/goozali/GoozaliMapper.py @@ -1,5 +1,6 @@ import json +from jobspy.jobs import JobPost from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice @@ -75,3 +76,7 @@ class GoozaliMapper: # Return a new GoozaliResponse with msg and the converted data return GoozaliResponse(msg=data['msg'], data=data_obj) + + def map_goozali_response_to_job_post(self, row: GoozaliRow, columns: dict[str, GoozaliColumn]) -> JobPost: + + return JobPost() diff --git a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py b/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py index dc0788d..149560a 100644 --- a/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py +++ b/src/jobspy/scrapers/goozali/GoozaliScrapperComponent.py @@ -1,5 +1,4 @@ -from datetime import datetime -import json +from datetime import datetime, timedelta from jobspy.scrapers.goozali.model import GoozaliRow from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn @@ -27,7 +26,7 @@ class GoozaliScrapperComponent: now = datetime.now() # Calculate the time delta for the given hours - time_delta = datetime.timedelta(hours=hours) + time_delta = timedelta(hours=hours) # Filter rows filtered_rows = [ diff --git a/src/jobspy/scrapers/utils.py b/src/jobspy/scrapers/utils.py index 7c032d7..6947650 100644 --- a/src/jobspy/scrapers/utils.py +++ b/src/jobspy/scrapers/utils.py @@ -1,4 +1,5 @@ from __future__ import annotations +from typing import Callable, TypeVar, List, Dict, Optional import re import logging @@ -283,3 +284,40 @@ def extract_job_type(description: str): listing_types.append(key) return listing_types if listing_types else None + + +K = TypeVar('K') # Key type +V = TypeVar('V') # Value type + + +def create_dict_by_key_and_value( + values: List[V], + key_mapper: Callable[[V], K], + value_mapper: Optional[Callable[[V], V]] = None +) -> Dict[K, V]: + """ + Create a dictionary by mapping keys and optionally mapping values. + + :param values: List of input values + :param key_mapper: Function to map a value to a key + :param value_mapper: Optional function to map a value to a transformed value + :return: A dictionary with mapped keys and values + """ + result = {} + for value in values: + key = key_mapper(value) + result[key] = value_mapper(value) if value_mapper else value + return result + +# Example usage: +# values = [ +# {"id": 1, "name": "Alice"}, +# {"id": 2, "name": "Bob"}, +# {"id": 3, "name": "Charlie"} +# ] + +# Key mapper: Extract 'id' as the key +# key_mapper = lambda x: x["id"] + +# Value mapper: Extract 'name' as the value +# value_mapper = lambda x: x["name"] diff --git a/src/tests/test_goozali.py b/src/tests/test_goozali.py index 91f8da8..a0e0af3 100644 --- a/src/tests/test_goozali.py +++ b/src/tests/test_goozali.py @@ -5,7 +5,9 @@ import pandas as pd from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent +from jobspy.scrapers.goozali.model import GoozaliColumn from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData +from jobspy.scrapers.utils import create_dict_by_key_and_value # URL Example # https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D @@ -33,7 +35,13 @@ try: filtered_rows_by_age_and_column_choice = component.filter_rows_by_hours( filtered_rows_by_column_choice, hours_old) - filtered_rows_by_age_and_column_choice + # Key mapper: Extract 'id' as the key + def extract_goozali_column_id(column): return column.id if isinstance( + column, GoozaliColumn) else None + dict_column_id_to_column = create_dict_by_key_and_value( + response_data.columns, extract_goozali_column_id) + + print("hello heloo") except FileNotFoundError: print("The file was not found.") except json.JSONDecodeError: