diff --git a/README.md b/README.md index 63d284e..049e9db 100644 --- a/README.md +++ b/README.md @@ -83,8 +83,8 @@ properties = scrape_property( properties = scrape_property( location="Dallas, TX", listing_type="for_sale", - datetime_from="2025-01-20T09:00:00", - datetime_to="2025-01-20T17:00:00" + date_from="2025-01-20T09:00:00", # Hour precision automatically detected + date_to="2025-01-20T17:00:00" ) ``` @@ -230,8 +230,8 @@ properties = scrape_property( properties = scrape_property( location="Phoenix, AZ", listing_type="for_sale", - datetime_from=datetime.now() - timedelta(days=7), - datetime_to=datetime.now(), + date_from=datetime.now() - timedelta(days=7), # datetime object - hour precision + date_to=datetime.now(), limit=100 ) ``` @@ -313,13 +313,14 @@ Optional │ ├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required. | (use this to get properties in chunks as there's a 10k result limit) -│ Format for both must be "YYYY-MM-DD". -│ Example: "2023-05-01", "2023-05-15" (fetches properties listed/sold between these dates) -│ -├── datetime_from, datetime_to (string): ISO 8601 datetime strings for hour-precise filtering. Uses client-side filtering. -│ Format: "YYYY-MM-DDTHH:MM:SS" or "YYYY-MM-DD" -│ Example: "2025-01-20T09:00:00", "2025-01-20T17:00:00" (fetches properties between 9 AM and 5 PM) -│ Note: Cannot be used together with date_from/date_to +│ Accepts multiple formats with automatic precision detection: +│ - Date strings: "YYYY-MM-DD" (day precision) +│ - Datetime strings: "YYYY-MM-DDTHH:MM:SS" (hour precision, uses client-side filtering) +│ - date objects: date(2025, 1, 20) (day precision) +│ - datetime objects: datetime(2025, 1, 20, 9, 0) (hour precision) +│ Examples: +│ Day precision: "2023-05-01", "2023-05-15" +│ Hour precision: "2025-01-20T09:00:00", "2025-01-20T17:00:00" │ ├── beds_min, beds_max (integer): Filter by number of bedrooms │ Example: beds_min=2, beds_max=4 (2-4 bedrooms) diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 45721c6..88cba18 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -1,11 +1,11 @@ import warnings import pandas as pd -from datetime import datetime, timedelta +from datetime import datetime, timedelta, date from .core.scrapers import ScraperInput from .utils import ( process_result, ordered_properties, validate_input, validate_dates, validate_limit, validate_offset, validate_datetime, validate_filters, validate_sort, validate_last_update_filters, - convert_to_datetime_string, extract_timedelta_hours, extract_timedelta_days + convert_to_datetime_string, extract_timedelta_hours, extract_timedelta_days, detect_precision_and_convert ) from .core.scrapers.realtor import RealtorScraper from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property @@ -20,8 +20,8 @@ def scrape_property( mls_only: bool = False, past_days: int | timedelta = None, proxy: str = None, - date_from: str = None, - date_to: str = None, + date_from: datetime | date | str = None, + date_to: datetime | date | str = None, foreclosure: bool = None, extra_property_data: bool = True, exclude_pending: bool = False, @@ -29,8 +29,6 @@ def scrape_property( offset: int = 0, # New date/time filtering parameters past_hours: int | timedelta = None, - datetime_from: datetime | str = None, - datetime_to: datetime | str = None, # New last_update_date filtering parameters updated_since: datetime | str = None, updated_in_past_hours: int | timedelta = None, @@ -67,7 +65,13 @@ def scrape_property( - PENDING: Filters by pending_date. Contingent properties without pending_date are included. - SOLD: Filters by sold_date (when property was sold) - FOR_SALE/FOR_RENT: Filters by list_date (when property was listed) - :param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates. format: 2021-01-28 + :param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates. + Accepts multiple formats for flexible precision: + - Date strings: "2025-01-20" (day-level precision) + - Datetime strings: "2025-01-20T14:30:00" (hour-level precision) + - date objects: date(2025, 1, 20) (day-level precision) + - datetime objects: datetime(2025, 1, 20, 14, 30) (hour-level precision) + The precision is automatically detected based on the input format. :param foreclosure: If set, fetches only foreclosure listings. :param extra_property_data: Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.) :param exclude_pending: If true, this excludes pending or contingent properties from the results, unless listing type is pending. @@ -76,7 +80,6 @@ def scrape_property( New parameters: :param past_hours: Get properties in the last _ hours (requires client-side filtering). Accepts int or timedelta. - :param datetime_from, datetime_to: Precise time filtering. Accepts datetime objects or ISO 8601 strings (e.g. "2025-01-20T14:30:00") :param updated_since: Filter by last_update_date (when property was last updated). Accepts datetime object or ISO 8601 string (client-side filtering) :param updated_in_past_hours: Filter by properties updated in the last _ hours. Accepts int or timedelta (client-side filtering) :param beds_min, beds_max: Filter by number of bedrooms @@ -91,11 +94,8 @@ def scrape_property( Note: past_days and past_hours also accept timedelta objects for more Pythonic usage. """ validate_input(listing_type) - validate_dates(date_from, date_to) validate_limit(limit) validate_offset(offset, limit) - validate_datetime(datetime_from) - validate_datetime(datetime_to) validate_filters( beds_min, beds_max, baths_min, baths_max, sqft_min, sqft_max, price_min, price_max, lot_sqft_min, lot_sqft_max, year_built_min, year_built_max @@ -116,11 +116,16 @@ def scrape_property( else: converted_listing_type = ListingType(listing_type.upper()) + # Convert date_from/date_to with precision detection + converted_date_from, date_from_precision = detect_precision_and_convert(date_from) + converted_date_to, date_to_precision = detect_precision_and_convert(date_to) + + # Validate converted dates + validate_dates(converted_date_from, converted_date_to) + # Convert datetime/timedelta objects to appropriate formats converted_past_days = extract_timedelta_days(past_days) converted_past_hours = extract_timedelta_hours(past_hours) - converted_datetime_from = convert_to_datetime_string(datetime_from) - converted_datetime_to = convert_to_datetime_string(datetime_to) converted_updated_since = convert_to_datetime_string(updated_since) converted_updated_in_past_hours = extract_timedelta_hours(updated_in_past_hours) @@ -133,8 +138,10 @@ def scrape_property( radius=radius, mls_only=mls_only, last_x_days=converted_past_days, - date_from=date_from, - date_to=date_to, + date_from=converted_date_from, + date_to=converted_date_to, + date_from_precision=date_from_precision, + date_to_precision=date_to_precision, foreclosure=foreclosure, extra_property_data=extra_property_data, exclude_pending=exclude_pending, @@ -142,8 +149,6 @@ def scrape_property( offset=offset, # New date/time filtering past_hours=converted_past_hours, - datetime_from=converted_datetime_from, - datetime_to=converted_datetime_to, # New last_update_date filtering updated_since=converted_updated_since, updated_in_past_hours=converted_updated_in_past_hours, diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 0a0b539..8ad2051 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -21,6 +21,8 @@ class ScraperInput(BaseModel): last_x_days: int | None = None date_from: str | None = None date_to: str | None = None + date_from_precision: str | None = None # "day" or "hour" + date_to_precision: str | None = None # "day" or "hour" foreclosure: bool | None = False extra_property_data: bool | None = True exclude_pending: bool | None = False @@ -30,8 +32,6 @@ class ScraperInput(BaseModel): # New date/time filtering parameters past_hours: int | None = None - datetime_from: str | None = None - datetime_to: str | None = None # New last_update_date filtering parameters updated_since: str | None = None @@ -107,6 +107,8 @@ class Scraper: self.mls_only = scraper_input.mls_only self.date_from = scraper_input.date_from self.date_to = scraper_input.date_to + self.date_from_precision = scraper_input.date_from_precision + self.date_to_precision = scraper_input.date_to_precision self.foreclosure = scraper_input.foreclosure self.extra_property_data = scraper_input.extra_property_data self.exclude_pending = scraper_input.exclude_pending @@ -116,8 +118,6 @@ class Scraper: # New date/time filtering self.past_hours = scraper_input.past_hours - self.datetime_from = scraper_input.datetime_from - self.datetime_to = scraper_input.datetime_to # New last_update_date filtering self.updated_since = scraper_input.updated_since diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 5a5dee3..76fffe5 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -164,23 +164,26 @@ class RealtorScraper(Scraper): # Build date parameter (expand to full days if hour-based filtering is used) if date_field: - if self.datetime_from or self.datetime_to: + # Check if we have hour precision (need to extract date part for API, then filter client-side) + has_hour_precision = (self.date_from_precision == "hour" or self.date_to_precision == "hour") + + if has_hour_precision and (self.date_from or self.date_to): # Hour-based datetime filtering: extract date parts for API, client-side filter by hours from datetime import datetime min_date = None max_date = None - if self.datetime_from: + if self.date_from: try: - dt_from = datetime.fromisoformat(self.datetime_from.replace('Z', '+00:00')) + dt_from = datetime.fromisoformat(self.date_from.replace('Z', '+00:00')) min_date = dt_from.strftime("%Y-%m-%d") except (ValueError, AttributeError): pass - if self.datetime_to: + if self.date_to: try: - dt_to = datetime.fromisoformat(self.datetime_to.replace('Z', '+00:00')) + dt_to = datetime.fromisoformat(self.date_to.replace('Z', '+00:00')) max_date = dt_to.strftime("%Y-%m-%d") except (ValueError, AttributeError): pass @@ -551,7 +554,8 @@ class RealtorScraper(Scraper): # Apply client-side hour-based filtering if needed # (API only supports day-level filtering, so we post-filter for hour precision) - if self.past_hours or self.datetime_from or self.datetime_to: + has_hour_precision = (self.date_from_precision == "hour" or self.date_to_precision == "hour") + if self.past_hours or has_hour_precision: homes = self._apply_hour_based_date_filter(homes) # Apply client-side date filtering for PENDING properties # (server-side filters are broken in the API) @@ -577,7 +581,7 @@ class RealtorScraper(Scraper): def _apply_hour_based_date_filter(self, homes): """Apply client-side hour-based date filtering for all listing types. - This is used when past_hours, datetime_from, or datetime_to are specified, + This is used when past_hours or date_from/date_to have hour precision, since the API only supports day-level filtering. """ if not homes: @@ -591,17 +595,17 @@ class RealtorScraper(Scraper): if self.past_hours: cutoff_datetime = datetime.now() - timedelta(hours=self.past_hours) date_range = {'type': 'since', 'date': cutoff_datetime} - elif self.datetime_from or self.datetime_to: + elif self.date_from or self.date_to: try: from_datetime = None to_datetime = None - if self.datetime_from: - from_datetime_str = self.datetime_from.replace('Z', '+00:00') if self.datetime_from.endswith('Z') else self.datetime_from + if self.date_from: + from_datetime_str = self.date_from.replace('Z', '+00:00') if self.date_from.endswith('Z') else self.date_from from_datetime = datetime.fromisoformat(from_datetime_str).replace(tzinfo=None) - if self.datetime_to: - to_datetime_str = self.datetime_to.replace('Z', '+00:00') if self.datetime_to.endswith('Z') else self.datetime_to + if self.date_to: + to_datetime_str = self.date_to.replace('Z', '+00:00') if self.date_to.endswith('Z') else self.date_to to_datetime = datetime.fromisoformat(to_datetime_str).replace(tzinfo=None) if from_datetime and to_datetime: diff --git a/homeharvest/utils.py b/homeharvest/utils.py index 58d67bd..5c2b142 100644 --- a/homeharvest/utils.py +++ b/homeharvest/utils.py @@ -176,20 +176,22 @@ def validate_dates(date_from: str | None, date_to: str | None) -> None: if date_from and date_to: try: - date_from_obj = datetime.strptime(date_from, "%Y-%m-%d") - date_to_obj = datetime.strptime(date_to, "%Y-%m-%d") + # Use fromisoformat to accept both date and datetime strings + date_from_str = date_from.replace('Z', '+00:00') if date_from.endswith('Z') else date_from + date_to_str = date_to.replace('Z', '+00:00') if date_to.endswith('Z') else date_to + + date_from_obj = datetime.fromisoformat(date_from_str) + date_to_obj = datetime.fromisoformat(date_to_str) if date_to_obj < date_from_obj: raise InvalidDate(f"date_to ('{date_to}') must be after date_from ('{date_from}').") except ValueError as e: # Provide specific guidance on the expected format - if "does not match format" in str(e): - raise InvalidDate( - f"Invalid date format. Expected 'YYYY-MM-DD' format. " - f"Examples: '2025-01-20', '2024-12-31'. " - f"Got: date_from='{date_from}', date_to='{date_to}'" - ) - raise InvalidDate(f"Invalid date format or range: {e}") + raise InvalidDate( + f"Invalid date format. Expected ISO 8601 format. " + f"Examples: '2025-01-20' (date only) or '2025-01-20T14:30:00' (with time). " + f"Got: date_from='{date_from}', date_to='{date_to}'. Error: {e}" + ) def validate_limit(limit: int) -> None: @@ -413,3 +415,46 @@ def extract_timedelta_days(value) -> int | None: f"Invalid past_days value. Expected int or timedelta object. " f"Got: {type(value).__name__}" ) + + +def detect_precision_and_convert(value): + """ + Detect if input has time precision and convert to ISO string. + + Accepts: + - datetime.datetime objects → (ISO string, "hour") + - datetime.date objects → (ISO string at midnight, "day") + - ISO 8601 datetime strings with time → (string as-is, "hour") + - Date-only strings "YYYY-MM-DD" → (string as-is, "day") + - None → (None, None) + + Returns: + tuple: (iso_string, precision) where precision is "day" or "hour" + """ + if value is None: + return (None, None) + + from datetime import datetime as dt, date + + # datetime.datetime object - has time precision + if isinstance(value, dt): + return (value.isoformat(), "hour") + + # datetime.date object - day precision only + if isinstance(value, date): + # Convert to datetime at midnight + return (dt.combine(value, dt.min.time()).isoformat(), "day") + + # String - detect if it has time component + if isinstance(value, str): + # ISO 8601 datetime with time component (has 'T' and time) + if 'T' in value: + return (value, "hour") + # Date-only string + else: + return (value, "day") + + raise ValueError( + f"Invalid date value. Expected datetime object, date object, or ISO 8601 string. " + f"Got: {type(value).__name__}" + )