Add last_update_date filtering and improve time interface DX

Part A: Add last_update_date filtering (client-side)
- Add updated_since parameter (accepts datetime object or ISO string)
- Add updated_in_past_hours parameter (accepts int or timedelta)
- Implement _apply_last_update_date_filter() method for client-side filtering
- Add mutual exclusion validation for updated_* parameters

Part B: Improve time interface DX
- Accept datetime/timedelta objects for datetime_from, datetime_to
- Accept timedelta objects for past_hours, past_days
- Add type conversion helper functions in utils.py
- Improve validation error messages with specific examples
- Update validate_datetime to accept datetime objects

Helper functions added:
- convert_to_datetime_string() - Converts datetime objects to ISO strings
- extract_timedelta_hours() - Extracts hours from timedelta objects
- extract_timedelta_days() - Extracts days from timedelta objects
- validate_last_update_filters() - Validates last_update_date parameters

All changes are backward compatible - existing string/int parameters still work.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Zachary Hampton
2025-11-11 12:00:15 -08:00
parent 3a0e91b876
commit a6fe0d2675
4 changed files with 237 additions and 21 deletions

View File

@@ -558,6 +558,10 @@ class RealtorScraper(Scraper):
elif self.listing_type == ListingType.PENDING and (self.last_x_days or self.date_from):
homes = self._apply_pending_date_filter(homes)
# Apply client-side filtering by last_update_date if specified
if self.updated_since or self.updated_in_past_hours:
homes = self._apply_last_update_date_filter(homes)
# Apply client-side sort to ensure results are properly ordered
# This is necessary after filtering and to guarantee sort order across page boundaries
if self.sort_by:
@@ -729,7 +733,51 @@ class RealtorScraper(Scraper):
if hasattr(home, 'flags') and home.flags:
return getattr(home.flags, 'is_contingent', False)
return False
def _apply_last_update_date_filter(self, homes):
"""Apply client-side filtering by last_update_date.
This is used when updated_since or updated_in_past_hours are specified.
Filters properties based on when they were last updated.
"""
if not homes:
return homes
from datetime import datetime, timedelta
# Determine date range for last_update_date filtering
date_range = None
if self.updated_in_past_hours:
cutoff_datetime = datetime.now() - timedelta(hours=self.updated_in_past_hours)
date_range = {'type': 'since', 'date': cutoff_datetime}
elif self.updated_since:
try:
since_datetime_str = self.updated_since.replace('Z', '+00:00') if self.updated_since.endswith('Z') else self.updated_since
since_datetime = datetime.fromisoformat(since_datetime_str).replace(tzinfo=None)
date_range = {'type': 'since', 'date': since_datetime}
except (ValueError, AttributeError):
return homes # If parsing fails, return unfiltered
if not date_range:
return homes
filtered_homes = []
for home in homes:
# Extract last_update_date from the property
property_date = self._extract_date_from_home(home, 'last_update_date')
# Skip properties without last_update_date
if property_date is None:
continue
# Check if property date falls within the specified range
if self._is_datetime_in_range(property_date, date_range):
filtered_homes.append(home)
return filtered_homes
def _get_date_range(self):
"""Get the date range for filtering based on instance parameters."""
from datetime import datetime, timedelta