mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 11:34:32 -08:00
Add last_status_change_date field for hour-level precision in date filtering
Enhances pending_date and last_sold_date with hour-level precision by introducing the last_status_change_date field. This allows for more accurate filtering of PENDING and SOLD properties when using past_hours parameter. Includes comprehensive tests and version bump to 0.7.1. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -192,6 +192,7 @@ class Property(BaseModel):
|
||||
list_date: datetime | None = Field(None, description="The time this Home entered Move system")
|
||||
pending_date: datetime | None = Field(None, description="The date listing went into pending state")
|
||||
last_sold_date: datetime | None = Field(None, description="Last time the Home was sold")
|
||||
last_status_change_date: datetime | None = Field(None, description="Last time the status of the listing changed")
|
||||
prc_sqft: int | None = None
|
||||
new_construction: bool | None = Field(None, description="Search for new construction homes")
|
||||
hoa_fee: int | None = Field(None, description="Search for homes where HOA fee is known and falls within specified range")
|
||||
|
||||
@@ -574,7 +574,11 @@ class RealtorScraper(Scraper):
|
||||
return 'list_date'
|
||||
|
||||
def _extract_date_from_home(self, home, date_field_name):
|
||||
"""Extract a date field from a home (handles both dict and Property object)."""
|
||||
"""Extract a date field from a home (handles both dict and Property object).
|
||||
|
||||
Falls back to last_status_change_date if the primary date field is not available,
|
||||
providing more precise filtering for PENDING/SOLD properties.
|
||||
"""
|
||||
if isinstance(home, dict):
|
||||
date_value = home.get(date_field_name)
|
||||
else:
|
||||
@@ -582,6 +586,17 @@ class RealtorScraper(Scraper):
|
||||
|
||||
if date_value:
|
||||
return self._parse_date_value(date_value)
|
||||
|
||||
# Fallback to last_status_change_date if primary date field is missing
|
||||
# This is useful for PENDING/SOLD properties where the specific date might be unavailable
|
||||
if isinstance(home, dict):
|
||||
fallback_date = home.get('last_status_change_date')
|
||||
else:
|
||||
fallback_date = getattr(home, 'last_status_change_date', None)
|
||||
|
||||
if fallback_date:
|
||||
return self._parse_date_value(fallback_date)
|
||||
|
||||
return None
|
||||
|
||||
def _is_datetime_in_range(self, date_obj, date_range):
|
||||
|
||||
@@ -125,6 +125,7 @@ def process_property(result: dict, mls_only: bool = False, extra_property_data:
|
||||
prc_sqft=result.get("price_per_sqft"),
|
||||
last_sold_date=(datetime.fromisoformat(result["last_sold_date"].replace('Z', '+00:00') if result["last_sold_date"].endswith('Z') else result["last_sold_date"]) if result.get("last_sold_date") else None),
|
||||
pending_date=(datetime.fromisoformat(result["pending_date"].replace('Z', '+00:00') if result["pending_date"].endswith('Z') else result["pending_date"]) if result.get("pending_date") else None),
|
||||
last_status_change_date=(datetime.fromisoformat(result["last_status_change_date"].replace('Z', '+00:00') if result["last_status_change_date"].endswith('Z') else result["last_status_change_date"]) if result.get("last_status_change_date") else None),
|
||||
new_construction=result["flags"].get("is_new_construction") is True,
|
||||
hoa_fee=(result["hoa"]["fee"] if result.get("hoa") and isinstance(result["hoa"], dict) else None),
|
||||
latitude=(result["location"]["address"]["coordinate"].get("lat") if able_to_get_lat_long else None),
|
||||
@@ -162,6 +163,25 @@ def process_property(result: dict, mls_only: bool = False, extra_property_data:
|
||||
photos=result.get("photos"),
|
||||
flags=result.get("flags"),
|
||||
)
|
||||
|
||||
# Enhance date precision using last_status_change_date
|
||||
# pending_date and last_sold_date only have day-level precision
|
||||
# last_status_change_date has hour-level precision
|
||||
if realty_property.last_status_change_date:
|
||||
status = realty_property.status.upper() if realty_property.status else None
|
||||
|
||||
# For PENDING/CONTINGENT properties, use last_status_change_date for hour-precision on pending_date
|
||||
if status in ["PENDING", "CONTINGENT"] and realty_property.pending_date:
|
||||
# Only replace if dates are on the same day
|
||||
if realty_property.pending_date.date() == realty_property.last_status_change_date.date():
|
||||
realty_property.pending_date = realty_property.last_status_change_date
|
||||
|
||||
# For SOLD properties, use last_status_change_date for hour-precision on last_sold_date
|
||||
elif status == "SOLD" and realty_property.last_sold_date:
|
||||
# Only replace if dates are on the same day
|
||||
if realty_property.last_sold_date.date() == realty_property.last_status_change_date.date():
|
||||
realty_property.last_sold_date = realty_property.last_status_change_date
|
||||
|
||||
return realty_property
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ _SEARCH_HOMES_DATA_BASE = """{
|
||||
mls_status
|
||||
last_sold_price
|
||||
last_sold_date
|
||||
last_status_change_date
|
||||
list_price
|
||||
list_price_max
|
||||
list_price_min
|
||||
|
||||
@@ -36,6 +36,7 @@ ordered_properties = [
|
||||
"sold_price",
|
||||
"last_sold_date",
|
||||
"last_sold_price",
|
||||
"last_status_change_date",
|
||||
"assessed_value",
|
||||
"estimated_value",
|
||||
"tax",
|
||||
@@ -120,7 +121,7 @@ def process_result(result: Property) -> pd.DataFrame:
|
||||
prop_data["nearby_schools"] = ", ".join(set(prop_data["nearby_schools"])) if prop_data["nearby_schools"] else None
|
||||
|
||||
# Convert datetime objects to strings for CSV (preserve full datetime including time)
|
||||
for date_field in ["list_date", "pending_date", "last_sold_date"]:
|
||||
for date_field in ["list_date", "pending_date", "last_sold_date", "last_status_change_date"]:
|
||||
if prop_data.get(date_field):
|
||||
prop_data[date_field] = prop_data[date_field].strftime("%Y-%m-%d %H:%M:%S") if hasattr(prop_data[date_field], 'strftime') else prop_data[date_field]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user