mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 11:34:32 -08:00
Consolidate date_from/date_to parameters - remove datetime_from/datetime_to
Simplified the time filtering interface by consolidating datetime_from/datetime_to into date_from/date_to with automatic precision detection. Changes: - Remove datetime_from and datetime_to parameters (confusing to have both) - Update date_from/date_to to accept multiple formats: - Date strings: "2025-01-20" (day precision) - Datetime strings: "2025-01-20T14:30:00" (hour precision) - date objects: date(2025, 1, 20) (day precision) - datetime objects: datetime(2025, 1, 20, 9, 0) (hour precision) - Add detect_precision_and_convert() helper to automatically detect precision - Add date_from_precision and date_to_precision fields to track precision level - Update filtering logic to use precision fields instead of separate parameters - Update README to remove datetime_from/datetime_to examples - Update validation to accept ISO datetime strings Benefits: - Single, intuitive parameter name (date_from/date_to) - Automatic precision detection based on input format - Reduced API surface area and cognitive load - More Pythonic - accept multiple input types All changes are backward compatible for existing date_from/date_to string usage. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
import warnings
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, date
|
||||
from .core.scrapers import ScraperInput
|
||||
from .utils import (
|
||||
process_result, ordered_properties, validate_input, validate_dates, validate_limit,
|
||||
validate_offset, validate_datetime, validate_filters, validate_sort, validate_last_update_filters,
|
||||
convert_to_datetime_string, extract_timedelta_hours, extract_timedelta_days
|
||||
convert_to_datetime_string, extract_timedelta_hours, extract_timedelta_days, detect_precision_and_convert
|
||||
)
|
||||
from .core.scrapers.realtor import RealtorScraper
|
||||
from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property
|
||||
@@ -20,8 +20,8 @@ def scrape_property(
|
||||
mls_only: bool = False,
|
||||
past_days: int | timedelta = None,
|
||||
proxy: str = None,
|
||||
date_from: str = None,
|
||||
date_to: str = None,
|
||||
date_from: datetime | date | str = None,
|
||||
date_to: datetime | date | str = None,
|
||||
foreclosure: bool = None,
|
||||
extra_property_data: bool = True,
|
||||
exclude_pending: bool = False,
|
||||
@@ -29,8 +29,6 @@ def scrape_property(
|
||||
offset: int = 0,
|
||||
# New date/time filtering parameters
|
||||
past_hours: int | timedelta = None,
|
||||
datetime_from: datetime | str = None,
|
||||
datetime_to: datetime | str = None,
|
||||
# New last_update_date filtering parameters
|
||||
updated_since: datetime | str = None,
|
||||
updated_in_past_hours: int | timedelta = None,
|
||||
@@ -67,7 +65,13 @@ def scrape_property(
|
||||
- PENDING: Filters by pending_date. Contingent properties without pending_date are included.
|
||||
- SOLD: Filters by sold_date (when property was sold)
|
||||
- FOR_SALE/FOR_RENT: Filters by list_date (when property was listed)
|
||||
:param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates. format: 2021-01-28
|
||||
:param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates.
|
||||
Accepts multiple formats for flexible precision:
|
||||
- Date strings: "2025-01-20" (day-level precision)
|
||||
- Datetime strings: "2025-01-20T14:30:00" (hour-level precision)
|
||||
- date objects: date(2025, 1, 20) (day-level precision)
|
||||
- datetime objects: datetime(2025, 1, 20, 14, 30) (hour-level precision)
|
||||
The precision is automatically detected based on the input format.
|
||||
:param foreclosure: If set, fetches only foreclosure listings.
|
||||
:param extra_property_data: Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.)
|
||||
:param exclude_pending: If true, this excludes pending or contingent properties from the results, unless listing type is pending.
|
||||
@@ -76,7 +80,6 @@ def scrape_property(
|
||||
|
||||
New parameters:
|
||||
:param past_hours: Get properties in the last _ hours (requires client-side filtering). Accepts int or timedelta.
|
||||
:param datetime_from, datetime_to: Precise time filtering. Accepts datetime objects or ISO 8601 strings (e.g. "2025-01-20T14:30:00")
|
||||
:param updated_since: Filter by last_update_date (when property was last updated). Accepts datetime object or ISO 8601 string (client-side filtering)
|
||||
:param updated_in_past_hours: Filter by properties updated in the last _ hours. Accepts int or timedelta (client-side filtering)
|
||||
:param beds_min, beds_max: Filter by number of bedrooms
|
||||
@@ -91,11 +94,8 @@ def scrape_property(
|
||||
Note: past_days and past_hours also accept timedelta objects for more Pythonic usage.
|
||||
"""
|
||||
validate_input(listing_type)
|
||||
validate_dates(date_from, date_to)
|
||||
validate_limit(limit)
|
||||
validate_offset(offset, limit)
|
||||
validate_datetime(datetime_from)
|
||||
validate_datetime(datetime_to)
|
||||
validate_filters(
|
||||
beds_min, beds_max, baths_min, baths_max, sqft_min, sqft_max,
|
||||
price_min, price_max, lot_sqft_min, lot_sqft_max, year_built_min, year_built_max
|
||||
@@ -116,11 +116,16 @@ def scrape_property(
|
||||
else:
|
||||
converted_listing_type = ListingType(listing_type.upper())
|
||||
|
||||
# Convert date_from/date_to with precision detection
|
||||
converted_date_from, date_from_precision = detect_precision_and_convert(date_from)
|
||||
converted_date_to, date_to_precision = detect_precision_and_convert(date_to)
|
||||
|
||||
# Validate converted dates
|
||||
validate_dates(converted_date_from, converted_date_to)
|
||||
|
||||
# Convert datetime/timedelta objects to appropriate formats
|
||||
converted_past_days = extract_timedelta_days(past_days)
|
||||
converted_past_hours = extract_timedelta_hours(past_hours)
|
||||
converted_datetime_from = convert_to_datetime_string(datetime_from)
|
||||
converted_datetime_to = convert_to_datetime_string(datetime_to)
|
||||
converted_updated_since = convert_to_datetime_string(updated_since)
|
||||
converted_updated_in_past_hours = extract_timedelta_hours(updated_in_past_hours)
|
||||
|
||||
@@ -133,8 +138,10 @@ def scrape_property(
|
||||
radius=radius,
|
||||
mls_only=mls_only,
|
||||
last_x_days=converted_past_days,
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
date_from=converted_date_from,
|
||||
date_to=converted_date_to,
|
||||
date_from_precision=date_from_precision,
|
||||
date_to_precision=date_to_precision,
|
||||
foreclosure=foreclosure,
|
||||
extra_property_data=extra_property_data,
|
||||
exclude_pending=exclude_pending,
|
||||
@@ -142,8 +149,6 @@ def scrape_property(
|
||||
offset=offset,
|
||||
# New date/time filtering
|
||||
past_hours=converted_past_hours,
|
||||
datetime_from=converted_datetime_from,
|
||||
datetime_to=converted_datetime_to,
|
||||
# New last_update_date filtering
|
||||
updated_since=converted_updated_since,
|
||||
updated_in_past_hours=converted_updated_in_past_hours,
|
||||
|
||||
@@ -21,6 +21,8 @@ class ScraperInput(BaseModel):
|
||||
last_x_days: int | None = None
|
||||
date_from: str | None = None
|
||||
date_to: str | None = None
|
||||
date_from_precision: str | None = None # "day" or "hour"
|
||||
date_to_precision: str | None = None # "day" or "hour"
|
||||
foreclosure: bool | None = False
|
||||
extra_property_data: bool | None = True
|
||||
exclude_pending: bool | None = False
|
||||
@@ -30,8 +32,6 @@ class ScraperInput(BaseModel):
|
||||
|
||||
# New date/time filtering parameters
|
||||
past_hours: int | None = None
|
||||
datetime_from: str | None = None
|
||||
datetime_to: str | None = None
|
||||
|
||||
# New last_update_date filtering parameters
|
||||
updated_since: str | None = None
|
||||
@@ -107,6 +107,8 @@ class Scraper:
|
||||
self.mls_only = scraper_input.mls_only
|
||||
self.date_from = scraper_input.date_from
|
||||
self.date_to = scraper_input.date_to
|
||||
self.date_from_precision = scraper_input.date_from_precision
|
||||
self.date_to_precision = scraper_input.date_to_precision
|
||||
self.foreclosure = scraper_input.foreclosure
|
||||
self.extra_property_data = scraper_input.extra_property_data
|
||||
self.exclude_pending = scraper_input.exclude_pending
|
||||
@@ -116,8 +118,6 @@ class Scraper:
|
||||
|
||||
# New date/time filtering
|
||||
self.past_hours = scraper_input.past_hours
|
||||
self.datetime_from = scraper_input.datetime_from
|
||||
self.datetime_to = scraper_input.datetime_to
|
||||
|
||||
# New last_update_date filtering
|
||||
self.updated_since = scraper_input.updated_since
|
||||
|
||||
@@ -164,23 +164,26 @@ class RealtorScraper(Scraper):
|
||||
|
||||
# Build date parameter (expand to full days if hour-based filtering is used)
|
||||
if date_field:
|
||||
if self.datetime_from or self.datetime_to:
|
||||
# Check if we have hour precision (need to extract date part for API, then filter client-side)
|
||||
has_hour_precision = (self.date_from_precision == "hour" or self.date_to_precision == "hour")
|
||||
|
||||
if has_hour_precision and (self.date_from or self.date_to):
|
||||
# Hour-based datetime filtering: extract date parts for API, client-side filter by hours
|
||||
from datetime import datetime
|
||||
|
||||
min_date = None
|
||||
max_date = None
|
||||
|
||||
if self.datetime_from:
|
||||
if self.date_from:
|
||||
try:
|
||||
dt_from = datetime.fromisoformat(self.datetime_from.replace('Z', '+00:00'))
|
||||
dt_from = datetime.fromisoformat(self.date_from.replace('Z', '+00:00'))
|
||||
min_date = dt_from.strftime("%Y-%m-%d")
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
if self.datetime_to:
|
||||
if self.date_to:
|
||||
try:
|
||||
dt_to = datetime.fromisoformat(self.datetime_to.replace('Z', '+00:00'))
|
||||
dt_to = datetime.fromisoformat(self.date_to.replace('Z', '+00:00'))
|
||||
max_date = dt_to.strftime("%Y-%m-%d")
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
@@ -551,7 +554,8 @@ class RealtorScraper(Scraper):
|
||||
|
||||
# Apply client-side hour-based filtering if needed
|
||||
# (API only supports day-level filtering, so we post-filter for hour precision)
|
||||
if self.past_hours or self.datetime_from or self.datetime_to:
|
||||
has_hour_precision = (self.date_from_precision == "hour" or self.date_to_precision == "hour")
|
||||
if self.past_hours or has_hour_precision:
|
||||
homes = self._apply_hour_based_date_filter(homes)
|
||||
# Apply client-side date filtering for PENDING properties
|
||||
# (server-side filters are broken in the API)
|
||||
@@ -577,7 +581,7 @@ class RealtorScraper(Scraper):
|
||||
def _apply_hour_based_date_filter(self, homes):
|
||||
"""Apply client-side hour-based date filtering for all listing types.
|
||||
|
||||
This is used when past_hours, datetime_from, or datetime_to are specified,
|
||||
This is used when past_hours or date_from/date_to have hour precision,
|
||||
since the API only supports day-level filtering.
|
||||
"""
|
||||
if not homes:
|
||||
@@ -591,17 +595,17 @@ class RealtorScraper(Scraper):
|
||||
if self.past_hours:
|
||||
cutoff_datetime = datetime.now() - timedelta(hours=self.past_hours)
|
||||
date_range = {'type': 'since', 'date': cutoff_datetime}
|
||||
elif self.datetime_from or self.datetime_to:
|
||||
elif self.date_from or self.date_to:
|
||||
try:
|
||||
from_datetime = None
|
||||
to_datetime = None
|
||||
|
||||
if self.datetime_from:
|
||||
from_datetime_str = self.datetime_from.replace('Z', '+00:00') if self.datetime_from.endswith('Z') else self.datetime_from
|
||||
if self.date_from:
|
||||
from_datetime_str = self.date_from.replace('Z', '+00:00') if self.date_from.endswith('Z') else self.date_from
|
||||
from_datetime = datetime.fromisoformat(from_datetime_str).replace(tzinfo=None)
|
||||
|
||||
if self.datetime_to:
|
||||
to_datetime_str = self.datetime_to.replace('Z', '+00:00') if self.datetime_to.endswith('Z') else self.datetime_to
|
||||
if self.date_to:
|
||||
to_datetime_str = self.date_to.replace('Z', '+00:00') if self.date_to.endswith('Z') else self.date_to
|
||||
to_datetime = datetime.fromisoformat(to_datetime_str).replace(tzinfo=None)
|
||||
|
||||
if from_datetime and to_datetime:
|
||||
|
||||
@@ -176,20 +176,22 @@ def validate_dates(date_from: str | None, date_to: str | None) -> None:
|
||||
|
||||
if date_from and date_to:
|
||||
try:
|
||||
date_from_obj = datetime.strptime(date_from, "%Y-%m-%d")
|
||||
date_to_obj = datetime.strptime(date_to, "%Y-%m-%d")
|
||||
# Use fromisoformat to accept both date and datetime strings
|
||||
date_from_str = date_from.replace('Z', '+00:00') if date_from.endswith('Z') else date_from
|
||||
date_to_str = date_to.replace('Z', '+00:00') if date_to.endswith('Z') else date_to
|
||||
|
||||
date_from_obj = datetime.fromisoformat(date_from_str)
|
||||
date_to_obj = datetime.fromisoformat(date_to_str)
|
||||
|
||||
if date_to_obj < date_from_obj:
|
||||
raise InvalidDate(f"date_to ('{date_to}') must be after date_from ('{date_from}').")
|
||||
except ValueError as e:
|
||||
# Provide specific guidance on the expected format
|
||||
if "does not match format" in str(e):
|
||||
raise InvalidDate(
|
||||
f"Invalid date format. Expected 'YYYY-MM-DD' format. "
|
||||
f"Examples: '2025-01-20', '2024-12-31'. "
|
||||
f"Got: date_from='{date_from}', date_to='{date_to}'"
|
||||
)
|
||||
raise InvalidDate(f"Invalid date format or range: {e}")
|
||||
raise InvalidDate(
|
||||
f"Invalid date format. Expected ISO 8601 format. "
|
||||
f"Examples: '2025-01-20' (date only) or '2025-01-20T14:30:00' (with time). "
|
||||
f"Got: date_from='{date_from}', date_to='{date_to}'. Error: {e}"
|
||||
)
|
||||
|
||||
|
||||
def validate_limit(limit: int) -> None:
|
||||
@@ -413,3 +415,46 @@ def extract_timedelta_days(value) -> int | None:
|
||||
f"Invalid past_days value. Expected int or timedelta object. "
|
||||
f"Got: {type(value).__name__}"
|
||||
)
|
||||
|
||||
|
||||
def detect_precision_and_convert(value):
|
||||
"""
|
||||
Detect if input has time precision and convert to ISO string.
|
||||
|
||||
Accepts:
|
||||
- datetime.datetime objects → (ISO string, "hour")
|
||||
- datetime.date objects → (ISO string at midnight, "day")
|
||||
- ISO 8601 datetime strings with time → (string as-is, "hour")
|
||||
- Date-only strings "YYYY-MM-DD" → (string as-is, "day")
|
||||
- None → (None, None)
|
||||
|
||||
Returns:
|
||||
tuple: (iso_string, precision) where precision is "day" or "hour"
|
||||
"""
|
||||
if value is None:
|
||||
return (None, None)
|
||||
|
||||
from datetime import datetime as dt, date
|
||||
|
||||
# datetime.datetime object - has time precision
|
||||
if isinstance(value, dt):
|
||||
return (value.isoformat(), "hour")
|
||||
|
||||
# datetime.date object - day precision only
|
||||
if isinstance(value, date):
|
||||
# Convert to datetime at midnight
|
||||
return (dt.combine(value, dt.min.time()).isoformat(), "day")
|
||||
|
||||
# String - detect if it has time component
|
||||
if isinstance(value, str):
|
||||
# ISO 8601 datetime with time component (has 'T' and time)
|
||||
if 'T' in value:
|
||||
return (value, "hour")
|
||||
# Date-only string
|
||||
else:
|
||||
return (value, "day")
|
||||
|
||||
raise ValueError(
|
||||
f"Invalid date value. Expected datetime object, date object, or ISO 8601 string. "
|
||||
f"Got: {type(value).__name__}"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user