Consolidate date_from/date_to parameters - remove datetime_from/datetime_to

Simplified the time filtering interface by consolidating datetime_from/datetime_to
into date_from/date_to with automatic precision detection.

Changes:
- Remove datetime_from and datetime_to parameters (confusing to have both)
- Update date_from/date_to to accept multiple formats:
  - Date strings: "2025-01-20" (day precision)
  - Datetime strings: "2025-01-20T14:30:00" (hour precision)
  - date objects: date(2025, 1, 20) (day precision)
  - datetime objects: datetime(2025, 1, 20, 9, 0) (hour precision)
- Add detect_precision_and_convert() helper to automatically detect precision
- Add date_from_precision and date_to_precision fields to track precision level
- Update filtering logic to use precision fields instead of separate parameters
- Update README to remove datetime_from/datetime_to examples
- Update validation to accept ISO datetime strings

Benefits:
- Single, intuitive parameter name (date_from/date_to)
- Automatic precision detection based on input format
- Reduced API surface area and cognitive load
- More Pythonic - accept multiple input types

All changes are backward compatible for existing date_from/date_to string usage.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Zachary Hampton
2025-11-11 12:19:15 -08:00
parent 940b663011
commit c7a0d6d398
5 changed files with 108 additions and 53 deletions

View File

@@ -83,8 +83,8 @@ properties = scrape_property(
properties = scrape_property( properties = scrape_property(
location="Dallas, TX", location="Dallas, TX",
listing_type="for_sale", listing_type="for_sale",
datetime_from="2025-01-20T09:00:00", date_from="2025-01-20T09:00:00", # Hour precision automatically detected
datetime_to="2025-01-20T17:00:00" date_to="2025-01-20T17:00:00"
) )
``` ```
@@ -230,8 +230,8 @@ properties = scrape_property(
properties = scrape_property( properties = scrape_property(
location="Phoenix, AZ", location="Phoenix, AZ",
listing_type="for_sale", listing_type="for_sale",
datetime_from=datetime.now() - timedelta(days=7), date_from=datetime.now() - timedelta(days=7), # datetime object - hour precision
datetime_to=datetime.now(), date_to=datetime.now(),
limit=100 limit=100
) )
``` ```
@@ -313,13 +313,14 @@ Optional
├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required. ├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required.
| (use this to get properties in chunks as there's a 10k result limit) | (use this to get properties in chunks as there's a 10k result limit)
Format for both must be "YYYY-MM-DD". Accepts multiple formats with automatic precision detection:
Example: "2023-05-01", "2023-05-15" (fetches properties listed/sold between these dates) - Date strings: "YYYY-MM-DD" (day precision)
- Datetime strings: "YYYY-MM-DDTHH:MM:SS" (hour precision, uses client-side filtering)
├── datetime_from, datetime_to (string): ISO 8601 datetime strings for hour-precise filtering. Uses client-side filtering. │ - date objects: date(2025, 1, 20) (day precision)
Format: "YYYY-MM-DDTHH:MM:SS" or "YYYY-MM-DD" - datetime objects: datetime(2025, 1, 20, 9, 0) (hour precision)
│ Example: "2025-01-20T09:00:00", "2025-01-20T17:00:00" (fetches properties between 9 AM and 5 PM) │ Examples:
Note: Cannot be used together with date_from/date_to Day precision: "2023-05-01", "2023-05-15"
│ Hour precision: "2025-01-20T09:00:00", "2025-01-20T17:00:00"
├── beds_min, beds_max (integer): Filter by number of bedrooms ├── beds_min, beds_max (integer): Filter by number of bedrooms
│ Example: beds_min=2, beds_max=4 (2-4 bedrooms) │ Example: beds_min=2, beds_max=4 (2-4 bedrooms)

View File

@@ -1,11 +1,11 @@
import warnings import warnings
import pandas as pd import pandas as pd
from datetime import datetime, timedelta from datetime import datetime, timedelta, date
from .core.scrapers import ScraperInput from .core.scrapers import ScraperInput
from .utils import ( from .utils import (
process_result, ordered_properties, validate_input, validate_dates, validate_limit, process_result, ordered_properties, validate_input, validate_dates, validate_limit,
validate_offset, validate_datetime, validate_filters, validate_sort, validate_last_update_filters, validate_offset, validate_datetime, validate_filters, validate_sort, validate_last_update_filters,
convert_to_datetime_string, extract_timedelta_hours, extract_timedelta_days convert_to_datetime_string, extract_timedelta_hours, extract_timedelta_days, detect_precision_and_convert
) )
from .core.scrapers.realtor import RealtorScraper from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property
@@ -20,8 +20,8 @@ def scrape_property(
mls_only: bool = False, mls_only: bool = False,
past_days: int | timedelta = None, past_days: int | timedelta = None,
proxy: str = None, proxy: str = None,
date_from: str = None, date_from: datetime | date | str = None,
date_to: str = None, date_to: datetime | date | str = None,
foreclosure: bool = None, foreclosure: bool = None,
extra_property_data: bool = True, extra_property_data: bool = True,
exclude_pending: bool = False, exclude_pending: bool = False,
@@ -29,8 +29,6 @@ def scrape_property(
offset: int = 0, offset: int = 0,
# New date/time filtering parameters # New date/time filtering parameters
past_hours: int | timedelta = None, past_hours: int | timedelta = None,
datetime_from: datetime | str = None,
datetime_to: datetime | str = None,
# New last_update_date filtering parameters # New last_update_date filtering parameters
updated_since: datetime | str = None, updated_since: datetime | str = None,
updated_in_past_hours: int | timedelta = None, updated_in_past_hours: int | timedelta = None,
@@ -67,7 +65,13 @@ def scrape_property(
- PENDING: Filters by pending_date. Contingent properties without pending_date are included. - PENDING: Filters by pending_date. Contingent properties without pending_date are included.
- SOLD: Filters by sold_date (when property was sold) - SOLD: Filters by sold_date (when property was sold)
- FOR_SALE/FOR_RENT: Filters by list_date (when property was listed) - FOR_SALE/FOR_RENT: Filters by list_date (when property was listed)
:param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates. format: 2021-01-28 :param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates.
Accepts multiple formats for flexible precision:
- Date strings: "2025-01-20" (day-level precision)
- Datetime strings: "2025-01-20T14:30:00" (hour-level precision)
- date objects: date(2025, 1, 20) (day-level precision)
- datetime objects: datetime(2025, 1, 20, 14, 30) (hour-level precision)
The precision is automatically detected based on the input format.
:param foreclosure: If set, fetches only foreclosure listings. :param foreclosure: If set, fetches only foreclosure listings.
:param extra_property_data: Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.) :param extra_property_data: Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.)
:param exclude_pending: If true, this excludes pending or contingent properties from the results, unless listing type is pending. :param exclude_pending: If true, this excludes pending or contingent properties from the results, unless listing type is pending.
@@ -76,7 +80,6 @@ def scrape_property(
New parameters: New parameters:
:param past_hours: Get properties in the last _ hours (requires client-side filtering). Accepts int or timedelta. :param past_hours: Get properties in the last _ hours (requires client-side filtering). Accepts int or timedelta.
:param datetime_from, datetime_to: Precise time filtering. Accepts datetime objects or ISO 8601 strings (e.g. "2025-01-20T14:30:00")
:param updated_since: Filter by last_update_date (when property was last updated). Accepts datetime object or ISO 8601 string (client-side filtering) :param updated_since: Filter by last_update_date (when property was last updated). Accepts datetime object or ISO 8601 string (client-side filtering)
:param updated_in_past_hours: Filter by properties updated in the last _ hours. Accepts int or timedelta (client-side filtering) :param updated_in_past_hours: Filter by properties updated in the last _ hours. Accepts int or timedelta (client-side filtering)
:param beds_min, beds_max: Filter by number of bedrooms :param beds_min, beds_max: Filter by number of bedrooms
@@ -91,11 +94,8 @@ def scrape_property(
Note: past_days and past_hours also accept timedelta objects for more Pythonic usage. Note: past_days and past_hours also accept timedelta objects for more Pythonic usage.
""" """
validate_input(listing_type) validate_input(listing_type)
validate_dates(date_from, date_to)
validate_limit(limit) validate_limit(limit)
validate_offset(offset, limit) validate_offset(offset, limit)
validate_datetime(datetime_from)
validate_datetime(datetime_to)
validate_filters( validate_filters(
beds_min, beds_max, baths_min, baths_max, sqft_min, sqft_max, beds_min, beds_max, baths_min, baths_max, sqft_min, sqft_max,
price_min, price_max, lot_sqft_min, lot_sqft_max, year_built_min, year_built_max price_min, price_max, lot_sqft_min, lot_sqft_max, year_built_min, year_built_max
@@ -116,11 +116,16 @@ def scrape_property(
else: else:
converted_listing_type = ListingType(listing_type.upper()) converted_listing_type = ListingType(listing_type.upper())
# Convert date_from/date_to with precision detection
converted_date_from, date_from_precision = detect_precision_and_convert(date_from)
converted_date_to, date_to_precision = detect_precision_and_convert(date_to)
# Validate converted dates
validate_dates(converted_date_from, converted_date_to)
# Convert datetime/timedelta objects to appropriate formats # Convert datetime/timedelta objects to appropriate formats
converted_past_days = extract_timedelta_days(past_days) converted_past_days = extract_timedelta_days(past_days)
converted_past_hours = extract_timedelta_hours(past_hours) converted_past_hours = extract_timedelta_hours(past_hours)
converted_datetime_from = convert_to_datetime_string(datetime_from)
converted_datetime_to = convert_to_datetime_string(datetime_to)
converted_updated_since = convert_to_datetime_string(updated_since) converted_updated_since = convert_to_datetime_string(updated_since)
converted_updated_in_past_hours = extract_timedelta_hours(updated_in_past_hours) converted_updated_in_past_hours = extract_timedelta_hours(updated_in_past_hours)
@@ -133,8 +138,10 @@ def scrape_property(
radius=radius, radius=radius,
mls_only=mls_only, mls_only=mls_only,
last_x_days=converted_past_days, last_x_days=converted_past_days,
date_from=date_from, date_from=converted_date_from,
date_to=date_to, date_to=converted_date_to,
date_from_precision=date_from_precision,
date_to_precision=date_to_precision,
foreclosure=foreclosure, foreclosure=foreclosure,
extra_property_data=extra_property_data, extra_property_data=extra_property_data,
exclude_pending=exclude_pending, exclude_pending=exclude_pending,
@@ -142,8 +149,6 @@ def scrape_property(
offset=offset, offset=offset,
# New date/time filtering # New date/time filtering
past_hours=converted_past_hours, past_hours=converted_past_hours,
datetime_from=converted_datetime_from,
datetime_to=converted_datetime_to,
# New last_update_date filtering # New last_update_date filtering
updated_since=converted_updated_since, updated_since=converted_updated_since,
updated_in_past_hours=converted_updated_in_past_hours, updated_in_past_hours=converted_updated_in_past_hours,

View File

@@ -21,6 +21,8 @@ class ScraperInput(BaseModel):
last_x_days: int | None = None last_x_days: int | None = None
date_from: str | None = None date_from: str | None = None
date_to: str | None = None date_to: str | None = None
date_from_precision: str | None = None # "day" or "hour"
date_to_precision: str | None = None # "day" or "hour"
foreclosure: bool | None = False foreclosure: bool | None = False
extra_property_data: bool | None = True extra_property_data: bool | None = True
exclude_pending: bool | None = False exclude_pending: bool | None = False
@@ -30,8 +32,6 @@ class ScraperInput(BaseModel):
# New date/time filtering parameters # New date/time filtering parameters
past_hours: int | None = None past_hours: int | None = None
datetime_from: str | None = None
datetime_to: str | None = None
# New last_update_date filtering parameters # New last_update_date filtering parameters
updated_since: str | None = None updated_since: str | None = None
@@ -107,6 +107,8 @@ class Scraper:
self.mls_only = scraper_input.mls_only self.mls_only = scraper_input.mls_only
self.date_from = scraper_input.date_from self.date_from = scraper_input.date_from
self.date_to = scraper_input.date_to self.date_to = scraper_input.date_to
self.date_from_precision = scraper_input.date_from_precision
self.date_to_precision = scraper_input.date_to_precision
self.foreclosure = scraper_input.foreclosure self.foreclosure = scraper_input.foreclosure
self.extra_property_data = scraper_input.extra_property_data self.extra_property_data = scraper_input.extra_property_data
self.exclude_pending = scraper_input.exclude_pending self.exclude_pending = scraper_input.exclude_pending
@@ -116,8 +118,6 @@ class Scraper:
# New date/time filtering # New date/time filtering
self.past_hours = scraper_input.past_hours self.past_hours = scraper_input.past_hours
self.datetime_from = scraper_input.datetime_from
self.datetime_to = scraper_input.datetime_to
# New last_update_date filtering # New last_update_date filtering
self.updated_since = scraper_input.updated_since self.updated_since = scraper_input.updated_since

View File

@@ -164,23 +164,26 @@ class RealtorScraper(Scraper):
# Build date parameter (expand to full days if hour-based filtering is used) # Build date parameter (expand to full days if hour-based filtering is used)
if date_field: if date_field:
if self.datetime_from or self.datetime_to: # Check if we have hour precision (need to extract date part for API, then filter client-side)
has_hour_precision = (self.date_from_precision == "hour" or self.date_to_precision == "hour")
if has_hour_precision and (self.date_from or self.date_to):
# Hour-based datetime filtering: extract date parts for API, client-side filter by hours # Hour-based datetime filtering: extract date parts for API, client-side filter by hours
from datetime import datetime from datetime import datetime
min_date = None min_date = None
max_date = None max_date = None
if self.datetime_from: if self.date_from:
try: try:
dt_from = datetime.fromisoformat(self.datetime_from.replace('Z', '+00:00')) dt_from = datetime.fromisoformat(self.date_from.replace('Z', '+00:00'))
min_date = dt_from.strftime("%Y-%m-%d") min_date = dt_from.strftime("%Y-%m-%d")
except (ValueError, AttributeError): except (ValueError, AttributeError):
pass pass
if self.datetime_to: if self.date_to:
try: try:
dt_to = datetime.fromisoformat(self.datetime_to.replace('Z', '+00:00')) dt_to = datetime.fromisoformat(self.date_to.replace('Z', '+00:00'))
max_date = dt_to.strftime("%Y-%m-%d") max_date = dt_to.strftime("%Y-%m-%d")
except (ValueError, AttributeError): except (ValueError, AttributeError):
pass pass
@@ -551,7 +554,8 @@ class RealtorScraper(Scraper):
# Apply client-side hour-based filtering if needed # Apply client-side hour-based filtering if needed
# (API only supports day-level filtering, so we post-filter for hour precision) # (API only supports day-level filtering, so we post-filter for hour precision)
if self.past_hours or self.datetime_from or self.datetime_to: has_hour_precision = (self.date_from_precision == "hour" or self.date_to_precision == "hour")
if self.past_hours or has_hour_precision:
homes = self._apply_hour_based_date_filter(homes) homes = self._apply_hour_based_date_filter(homes)
# Apply client-side date filtering for PENDING properties # Apply client-side date filtering for PENDING properties
# (server-side filters are broken in the API) # (server-side filters are broken in the API)
@@ -577,7 +581,7 @@ class RealtorScraper(Scraper):
def _apply_hour_based_date_filter(self, homes): def _apply_hour_based_date_filter(self, homes):
"""Apply client-side hour-based date filtering for all listing types. """Apply client-side hour-based date filtering for all listing types.
This is used when past_hours, datetime_from, or datetime_to are specified, This is used when past_hours or date_from/date_to have hour precision,
since the API only supports day-level filtering. since the API only supports day-level filtering.
""" """
if not homes: if not homes:
@@ -591,17 +595,17 @@ class RealtorScraper(Scraper):
if self.past_hours: if self.past_hours:
cutoff_datetime = datetime.now() - timedelta(hours=self.past_hours) cutoff_datetime = datetime.now() - timedelta(hours=self.past_hours)
date_range = {'type': 'since', 'date': cutoff_datetime} date_range = {'type': 'since', 'date': cutoff_datetime}
elif self.datetime_from or self.datetime_to: elif self.date_from or self.date_to:
try: try:
from_datetime = None from_datetime = None
to_datetime = None to_datetime = None
if self.datetime_from: if self.date_from:
from_datetime_str = self.datetime_from.replace('Z', '+00:00') if self.datetime_from.endswith('Z') else self.datetime_from from_datetime_str = self.date_from.replace('Z', '+00:00') if self.date_from.endswith('Z') else self.date_from
from_datetime = datetime.fromisoformat(from_datetime_str).replace(tzinfo=None) from_datetime = datetime.fromisoformat(from_datetime_str).replace(tzinfo=None)
if self.datetime_to: if self.date_to:
to_datetime_str = self.datetime_to.replace('Z', '+00:00') if self.datetime_to.endswith('Z') else self.datetime_to to_datetime_str = self.date_to.replace('Z', '+00:00') if self.date_to.endswith('Z') else self.date_to
to_datetime = datetime.fromisoformat(to_datetime_str).replace(tzinfo=None) to_datetime = datetime.fromisoformat(to_datetime_str).replace(tzinfo=None)
if from_datetime and to_datetime: if from_datetime and to_datetime:

View File

@@ -176,20 +176,22 @@ def validate_dates(date_from: str | None, date_to: str | None) -> None:
if date_from and date_to: if date_from and date_to:
try: try:
date_from_obj = datetime.strptime(date_from, "%Y-%m-%d") # Use fromisoformat to accept both date and datetime strings
date_to_obj = datetime.strptime(date_to, "%Y-%m-%d") date_from_str = date_from.replace('Z', '+00:00') if date_from.endswith('Z') else date_from
date_to_str = date_to.replace('Z', '+00:00') if date_to.endswith('Z') else date_to
date_from_obj = datetime.fromisoformat(date_from_str)
date_to_obj = datetime.fromisoformat(date_to_str)
if date_to_obj < date_from_obj: if date_to_obj < date_from_obj:
raise InvalidDate(f"date_to ('{date_to}') must be after date_from ('{date_from}').") raise InvalidDate(f"date_to ('{date_to}') must be after date_from ('{date_from}').")
except ValueError as e: except ValueError as e:
# Provide specific guidance on the expected format # Provide specific guidance on the expected format
if "does not match format" in str(e): raise InvalidDate(
raise InvalidDate( f"Invalid date format. Expected ISO 8601 format. "
f"Invalid date format. Expected 'YYYY-MM-DD' format. " f"Examples: '2025-01-20' (date only) or '2025-01-20T14:30:00' (with time). "
f"Examples: '2025-01-20', '2024-12-31'. " f"Got: date_from='{date_from}', date_to='{date_to}'. Error: {e}"
f"Got: date_from='{date_from}', date_to='{date_to}'" )
)
raise InvalidDate(f"Invalid date format or range: {e}")
def validate_limit(limit: int) -> None: def validate_limit(limit: int) -> None:
@@ -413,3 +415,46 @@ def extract_timedelta_days(value) -> int | None:
f"Invalid past_days value. Expected int or timedelta object. " f"Invalid past_days value. Expected int or timedelta object. "
f"Got: {type(value).__name__}" f"Got: {type(value).__name__}"
) )
def detect_precision_and_convert(value):
"""
Detect if input has time precision and convert to ISO string.
Accepts:
- datetime.datetime objects → (ISO string, "hour")
- datetime.date objects → (ISO string at midnight, "day")
- ISO 8601 datetime strings with time → (string as-is, "hour")
- Date-only strings "YYYY-MM-DD" → (string as-is, "day")
- None → (None, None)
Returns:
tuple: (iso_string, precision) where precision is "day" or "hour"
"""
if value is None:
return (None, None)
from datetime import datetime as dt, date
# datetime.datetime object - has time precision
if isinstance(value, dt):
return (value.isoformat(), "hour")
# datetime.date object - day precision only
if isinstance(value, date):
# Convert to datetime at midnight
return (dt.combine(value, dt.min.time()).isoformat(), "day")
# String - detect if it has time component
if isinstance(value, str):
# ISO 8601 datetime with time component (has 'T' and time)
if 'T' in value:
return (value, "hour")
# Date-only string
else:
return (value, "day")
raise ValueError(
f"Invalid date value. Expected datetime object, date object, or ISO 8601 string. "
f"Got: {type(value).__name__}"
)