mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 19:44:29 -08:00
Fix exclude_pending and mls_only filters not working with raw return type
When return_type="raw" was specified, the exclude_pending and mls_only parameters were ignored because these filters only existed in process_property(), which is bypassed for raw data returns. Changes: - Added _apply_raw_data_filters() method to handle client-side filtering for raw data - Applied the filter in search() method after sorting but before returning - Fixed exclude_pending to check flags.is_pending and flags.is_contingent - Fixed mls_only to check source.id (not mls.id which doesn't exist in raw data) - Added comprehensive tests for both filters with raw data Fixes #140 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -522,6 +522,11 @@ class RealtorScraper(Scraper):
|
||||
if self.sort_by:
|
||||
homes = self._apply_sort(homes)
|
||||
|
||||
# Apply raw data filters (exclude_pending and mls_only) for raw return type
|
||||
# These filters are normally applied in process_property() but are bypassed for raw data
|
||||
if self.return_type == ReturnType.raw:
|
||||
homes = self._apply_raw_data_filters(homes)
|
||||
|
||||
return homes
|
||||
|
||||
def _apply_hour_based_date_filter(self, homes):
|
||||
@@ -800,6 +805,47 @@ class RealtorScraper(Scraper):
|
||||
|
||||
return sorted_homes
|
||||
|
||||
def _apply_raw_data_filters(self, homes):
|
||||
"""Apply exclude_pending and mls_only filters for raw data returns.
|
||||
|
||||
These filters are normally applied in process_property(), but that function
|
||||
is bypassed when return_type="raw", so we need to apply them here instead.
|
||||
|
||||
Args:
|
||||
homes: List of properties (either dicts or Property objects)
|
||||
|
||||
Returns:
|
||||
Filtered list of properties
|
||||
"""
|
||||
if not homes:
|
||||
return homes
|
||||
|
||||
# Only filter raw data (dict objects)
|
||||
# Property objects have already been filtered in process_property()
|
||||
if homes and not isinstance(homes[0], dict):
|
||||
return homes
|
||||
|
||||
filtered_homes = []
|
||||
|
||||
for home in homes:
|
||||
# Apply exclude_pending filter
|
||||
if self.exclude_pending and self.listing_type != ListingType.PENDING:
|
||||
flags = home.get('flags', {})
|
||||
is_pending = flags.get('is_pending', False)
|
||||
is_contingent = flags.get('is_contingent', False)
|
||||
|
||||
if is_pending or is_contingent:
|
||||
continue # Skip this property
|
||||
|
||||
# Apply mls_only filter
|
||||
if self.mls_only:
|
||||
source = home.get('source', {})
|
||||
if not source or not source.get('id'):
|
||||
continue # Skip this property
|
||||
|
||||
filtered_homes.append(home)
|
||||
|
||||
return filtered_homes
|
||||
|
||||
|
||||
@retry(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.7.2"
|
||||
version = "0.7.3"
|
||||
description = "Real estate scraping library"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
||||
|
||||
@@ -1270,3 +1270,83 @@ def test_last_status_change_date_hour_filtering():
|
||||
f"PENDING property pending_date {pending_date} should be within 48 hours of {cutoff_time}"
|
||||
except (ValueError, TypeError):
|
||||
pass # Skip if parsing fails
|
||||
|
||||
|
||||
def test_exclude_pending_with_raw_data():
|
||||
"""Test that exclude_pending parameter works correctly with return_type='raw'"""
|
||||
|
||||
# Query for sale properties with exclude_pending=True and raw data
|
||||
result = scrape_property(
|
||||
location="Phoenix, AZ",
|
||||
listing_type="for_sale",
|
||||
exclude_pending=True,
|
||||
return_type="raw",
|
||||
limit=50
|
||||
)
|
||||
|
||||
assert result is not None and len(result) > 0
|
||||
|
||||
# Verify that no pending or contingent properties are in the results
|
||||
for prop in result:
|
||||
flags = prop.get('flags', {})
|
||||
is_pending = flags.get('is_pending', False)
|
||||
is_contingent = flags.get('is_contingent', False)
|
||||
|
||||
assert not is_pending, f"Property {prop.get('property_id')} should not be pending when exclude_pending=True"
|
||||
assert not is_contingent, f"Property {prop.get('property_id')} should not be contingent when exclude_pending=True"
|
||||
|
||||
|
||||
def test_mls_only_with_raw_data():
|
||||
"""Test that mls_only parameter works correctly with return_type='raw'"""
|
||||
|
||||
# Query with mls_only=True and raw data
|
||||
result = scrape_property(
|
||||
location="Dallas, TX",
|
||||
listing_type="for_sale",
|
||||
mls_only=True,
|
||||
return_type="raw",
|
||||
limit=50
|
||||
)
|
||||
|
||||
assert result is not None and len(result) > 0
|
||||
|
||||
# Verify that all properties have MLS IDs (stored in source.id)
|
||||
for prop in result:
|
||||
source = prop.get('source', {})
|
||||
mls_id = source.get('id') if source else None
|
||||
|
||||
assert mls_id is not None and mls_id != "", \
|
||||
f"Property {prop.get('property_id')} should have an MLS ID (source.id) when mls_only=True, got: {mls_id}"
|
||||
|
||||
|
||||
def test_combined_filters_with_raw_data():
|
||||
"""Test that both exclude_pending and mls_only work together with return_type='raw'"""
|
||||
|
||||
# Query with both filters enabled and raw data
|
||||
result = scrape_property(
|
||||
location="Austin, TX",
|
||||
listing_type="for_sale",
|
||||
exclude_pending=True,
|
||||
mls_only=True,
|
||||
return_type="raw",
|
||||
limit=30
|
||||
)
|
||||
|
||||
assert result is not None and len(result) > 0
|
||||
|
||||
# Verify both filters are applied
|
||||
for prop in result:
|
||||
# Check exclude_pending filter
|
||||
flags = prop.get('flags', {})
|
||||
is_pending = flags.get('is_pending', False)
|
||||
is_contingent = flags.get('is_contingent', False)
|
||||
|
||||
assert not is_pending, f"Property {prop.get('property_id')} should not be pending"
|
||||
assert not is_contingent, f"Property {prop.get('property_id')} should not be contingent"
|
||||
|
||||
# Check mls_only filter
|
||||
source = prop.get('source', {})
|
||||
mls_id = source.get('id') if source else None
|
||||
|
||||
assert mls_id is not None and mls_id != "", \
|
||||
f"Property {prop.get('property_id')} should have an MLS ID (source.id)"
|
||||
Reference in New Issue
Block a user