fixed datetime import, new function to create dicts

pull/231/head
Yariv Menachem 2024-12-18 14:30:21 +02:00
parent 58f0793181
commit ead8eb126d
4 changed files with 54 additions and 4 deletions

View File

@ -1,5 +1,6 @@
import json import json
from jobspy.jobs import JobPost
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
@ -75,3 +76,7 @@ class GoozaliMapper:
# Return a new GoozaliResponse with msg and the converted data # Return a new GoozaliResponse with msg and the converted data
return GoozaliResponse(msg=data['msg'], data=data_obj) return GoozaliResponse(msg=data['msg'], data=data_obj)
def map_goozali_response_to_job_post(self, row: GoozaliRow, columns: dict[str, GoozaliColumn]) -> JobPost:
return JobPost()

View File

@ -1,5 +1,4 @@
from datetime import datetime from datetime import datetime, timedelta
import json
from jobspy.scrapers.goozali.model import GoozaliRow from jobspy.scrapers.goozali.model import GoozaliRow
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
@ -27,7 +26,7 @@ class GoozaliScrapperComponent:
now = datetime.now() now = datetime.now()
# Calculate the time delta for the given hours # Calculate the time delta for the given hours
time_delta = datetime.timedelta(hours=hours) time_delta = timedelta(hours=hours)
# Filter rows # Filter rows
filtered_rows = [ filtered_rows = [

View File

@ -1,4 +1,5 @@
from __future__ import annotations from __future__ import annotations
from typing import Callable, TypeVar, List, Dict, Optional
import re import re
import logging import logging
@ -283,3 +284,40 @@ def extract_job_type(description: str):
listing_types.append(key) listing_types.append(key)
return listing_types if listing_types else None return listing_types if listing_types else None
K = TypeVar('K') # Key type
V = TypeVar('V') # Value type
def create_dict_by_key_and_value(
values: List[V],
key_mapper: Callable[[V], K],
value_mapper: Optional[Callable[[V], V]] = None
) -> Dict[K, V]:
"""
Create a dictionary by mapping keys and optionally mapping values.
:param values: List of input values
:param key_mapper: Function to map a value to a key
:param value_mapper: Optional function to map a value to a transformed value
:return: A dictionary with mapped keys and values
"""
result = {}
for value in values:
key = key_mapper(value)
result[key] = value_mapper(value) if value_mapper else value
return result
# Example usage:
# values = [
# {"id": 1, "name": "Alice"},
# {"id": 2, "name": "Bob"},
# {"id": 3, "name": "Charlie"}
# ]
# Key mapper: Extract 'id' as the key
# key_mapper = lambda x: x["id"]
# Value mapper: Extract 'name' as the value
# value_mapper = lambda x: x["name"]

View File

@ -5,7 +5,9 @@ import pandas as pd
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
from jobspy.scrapers.goozali.model import GoozaliColumn
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
from jobspy.scrapers.utils import create_dict_by_key_and_value
# URL Example # URL Example
# https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D # https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D
@ -33,7 +35,13 @@ try:
filtered_rows_by_age_and_column_choice = component.filter_rows_by_hours( filtered_rows_by_age_and_column_choice = component.filter_rows_by_hours(
filtered_rows_by_column_choice, hours_old) filtered_rows_by_column_choice, hours_old)
filtered_rows_by_age_and_column_choice # Key mapper: Extract 'id' as the key
def extract_goozali_column_id(column): return column.id if isinstance(
column, GoozaliColumn) else None
dict_column_id_to_column = create_dict_by_key_and_value(
response_data.columns, extract_goozali_column_id)
print("hello heloo")
except FileNotFoundError: except FileNotFoundError:
print("The file was not found.") print("The file was not found.")
except json.JSONDecodeError: except json.JSONDecodeError: