HomeHarvest/tests/test_realtor.py

import pytz
from concurrent.futures import ThreadPoolExecutor, as_completed

from homeharvest import scrape_property, Property
import pandas as pd


def test_realtor_pending_or_contingent():
    pending_or_contingent_result = scrape_property(location="Surprise, AZ", listing_type="pending")

    regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale", exclude_pending=True)

    assert all([result is not None for result in [pending_or_contingent_result, regular_result]])
    assert len(pending_or_contingent_result) != len(regular_result)


def test_realtor_pending_comps():
    pending_comps = scrape_property(
        location="2530 Al Lipscomb Way",
        radius=5,
        past_days=180,
        listing_type="pending",
    )

    for_sale_comps = scrape_property(
        location="2530 Al Lipscomb Way",
        radius=5,
        past_days=180,
        listing_type="for_sale",
    )

    sold_comps = scrape_property(
        location="2530 Al Lipscomb Way",
        radius=5,
        past_days=180,
        listing_type="sold",
    )

    results = [pending_comps, for_sale_comps, sold_comps]
    assert all([result is not None for result in results])

    #: assert all lengths are different
    assert len(set([len(result) for result in results])) == len(results)


def test_realtor_sold_past():
    result = scrape_property(
        location="San Diego, CA",
        past_days=30,
        listing_type="sold",
    )

    assert result is not None and len(result) > 0


def test_realtor_comps():
    result = scrape_property(
        location="2530 Al Lipscomb Way",
        radius=0.5,
        past_days=180,
        listing_type="sold",
    )

    assert result is not None and len(result) > 0


def test_realtor_last_x_days_sold():
    days_result_30 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=30)

    days_result_10 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=10)

    assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len(
        days_result_10
    )


def test_realtor_date_range_sold():
    days_result_30 = scrape_property(
        location="Dallas, TX", listing_type="sold", date_from="2023-05-01", date_to="2023-05-28"
    )

    days_result_60 = scrape_property(
        location="Dallas, TX", listing_type="sold", date_from="2023-04-01", date_to="2023-06-10"
    )

    assert all([result is not None for result in [days_result_30, days_result_60]]) and len(days_result_30) < len(
        days_result_60
    )


def test_listing_type_none_includes_sold():
    """Test that listing_type=None includes sold listings (issue #142)"""
    # Get properties with listing_type=None (should include all common types)
    result_none = scrape_property(
        location="Warren, MI",
        listing_type=None
    )

    # Verify we got results
    assert result_none is not None and len(result_none) > 0

    # Verify sold listings are included
    status_types = set(result_none['status'].unique())
    assert 'SOLD' in status_types, "SOLD listings should be included when listing_type=None"

    # Verify we get multiple listing types (not just one)
    assert len(status_types) > 1, "Should return multiple listing types when listing_type=None"


def test_realtor_single_property():
    results = [
        scrape_property(
            location="15509 N 172nd Dr, Surprise, AZ 85388",
            listing_type="for_sale",
        ),
        scrape_property(
            location="2530 Al Lipscomb Way",
            listing_type="for_sale",
        ),
    ]

    assert all([result is not None for result in results])


def test_realtor():
    results = [
        scrape_property(
            location="2530 Al Lipscomb Way",
            listing_type="for_sale",
        ),
        scrape_property(
            location="Phoenix, AZ", listing_type="for_rent", limit=1000
        ),  #: does not support "city, state, USA" format
        scrape_property(
            location="Dallas, TX", listing_type="sold", limit=1000
        ),  #: does not support "city, state, USA" format
        scrape_property(location="85281"),
    ]

    assert all([result is not None for result in results])


def test_realtor_city():
    results = scrape_property(location="Atlanta, GA", listing_type="for_sale", limit=1000)

    assert results is not None and len(results) > 0


def test_realtor_land():
    results = scrape_property(location="Atlanta, GA", listing_type="for_sale", property_type=["land"], limit=1000)

    assert results is not None and len(results) > 0


def test_realtor_bad_address():
    bad_results = scrape_property(
        location="abceefg ju098ot498hh9",
        listing_type="for_sale",
    )

    if len(bad_results) == 0:
        assert True


def test_realtor_foreclosed():
    foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=True)

    not_foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=False)

    assert len(foreclosed) != len(not_foreclosed)


def test_realtor_agent():
    scraped = scrape_property(location="Detroit, MI", listing_type="for_sale", limit=1000, extra_property_data=False)
    assert scraped["agent_name"].nunique() > 1


def test_realtor_without_extra_details():
    results = [
        scrape_property(
            location="00741",
            listing_type="sold",
            limit=10,
            extra_property_data=False,
        ),
        scrape_property(
            location="00741",
            listing_type="sold",
            limit=10,
            extra_property_data=True,
        ),
    ]

    # When extra_property_data=False, these fields should be None
    extra_fields = ["nearby_schools", "assessed_value", "tax", "tax_history"]

    # Check that all extra fields are None when extra_property_data=False
    for field in extra_fields:
        if field in results[0].columns:
            assert results[0][field].isna().all(), f"Field '{field}' should be None when extra_property_data=False"


def test_pr_zip_code():
    results = scrape_property(
        location="00741",
        listing_type="for_sale",
    )

    assert results is not None and len(results) > 0


def test_exclude_pending():
    results = scrape_property(
        location="33567",
        listing_type="pending",
        exclude_pending=True,
    )

    assert results is not None and len(results) > 0


def test_style_value_error():
    results = scrape_property(
        location="Alaska, AK",
        listing_type="sold",
        extra_property_data=False,
        limit=1000,
    )

    assert results is not None and len(results) > 0


def test_primary_image_error():
    results = scrape_property(
        location="Spokane, PA",
        listing_type="for_rent",  # or (for_sale, for_rent, pending)
        past_days=360,
        radius=3,
        extra_property_data=False,
    )

    assert results is not None and len(results) > 0


def test_limit():
    over_limit = 876
    extra_params = {"limit": over_limit}

    over_results = scrape_property(
        location="Waddell, AZ",
        listing_type="for_sale",
        **extra_params,
    )

    assert over_results is not None and len(over_results) <= over_limit

    under_limit = 1
    under_results = scrape_property(
        location="Waddell, AZ",
        listing_type="for_sale",
        limit=under_limit,
    )

    assert under_results is not None and len(under_results) == under_limit


def test_apartment_list_price():
    results = scrape_property(
        location="Spokane, WA",
        listing_type="for_rent",  # or (for_sale, for_rent, pending)
        extra_property_data=False,
    )

    assert results is not None

    results = results[results["style"] == "APARTMENT"]

    #: get percentage of results with atleast 1 of any column not none, list_price, list_price_min, list_price_max
    assert (
        len(results[results[["list_price", "list_price_min", "list_price_max"]].notnull().any(axis=1)]) / len(results)
        > 0.5
    )


def test_phone_number_matching():
    searches = [
        scrape_property(
            location="Phoenix, AZ",
            listing_type="for_sale",
            limit=100,
        ),
        scrape_property(
            location="Phoenix, AZ",
            listing_type="for_sale",
            limit=100,
        ),
    ]

    assert all([search is not None for search in searches])

    #: random row
    row = searches[0][searches[0]["agent_phones"].notnull()].sample()

    #: find matching row
    matching_row = searches[1].loc[searches[1]["property_url"] == row["property_url"].values[0]]

    #: assert phone numbers are the same
    assert row["agent_phones"].values[0] == matching_row["agent_phones"].values[0]


def test_parallel_search_consistency():
    """Test that the same search executed 3 times in parallel returns consistent results"""
    def search_task():
        return scrape_property(
            location="Phoenix, AZ",
            listing_type="for_sale",
            limit=100
        )

    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = [executor.submit(search_task) for _ in range(3)]
        results = [future.result() for future in as_completed(futures)]

    # Verify all results are valid
    assert all([result is not None for result in results])
    assert all([isinstance(result, pd.DataFrame) for result in results])
    assert all([len(result) > 0 for result in results])

    # Verify all results have the same length (primary consistency check)
    lengths = [len(result) for result in results]
    assert len(set(lengths)) == 1, \
        f"All parallel searches should return same number of results, got lengths: {lengths}"


def test_return_type():
    results = {
        "pandas": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100)],
        "pydantic": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic")],
        "raw": [
            scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
            scrape_property(location="85281", listing_type="for_rent", limit=100, return_type="raw"),
        ],
    }

    assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
    assert all(isinstance(result[0], Property) for result in results["pydantic"])
    assert all(isinstance(result[0], dict) for result in results["raw"])


def test_has_open_house():
    """Test that open_houses field is present and properly structured when it exists"""

    # Test that open_houses field exists in results (may be None if no open houses scheduled)
    address_result = scrape_property("1 Hawthorne St Unit 12F, San Francisco, CA 94105", return_type="raw")
    assert "open_houses" in address_result[0], "open_houses field should exist in address search results"

    # Test general search also includes open_houses field
    zip_code_result = scrape_property("94105", listing_type="for_sale", limit=50, return_type="raw")
    assert len(zip_code_result) > 0, "Should have results from zip code search"

    # Verify open_houses field exists in general search
    assert "open_houses" in zip_code_result[0], "open_houses field should exist in general search results"

    # If we find any properties with open houses, verify the data structure
    properties_with_open_houses = [prop for prop in zip_code_result if prop.get("open_houses") is not None]

    if properties_with_open_houses:
        # Verify structure of open_houses data
        first_with_open_house = properties_with_open_houses[0]
        assert isinstance(first_with_open_house["open_houses"], (list, dict)), \
            "open_houses should be a list or dict when present"


def test_return_type_consistency():
    """Test that return_type works consistently between general and address searches"""

    # Test configurations - different search types
    test_locations = [
        ("Dallas, TX", "general"),  # General city search
        ("75201", "zip"),          # ZIP code search
        ("2530 Al Lipscomb Way", "address")  # Address search
    ]

    for location, search_type in test_locations:
        # Test all return types for each search type
        pandas_result = scrape_property(
            location=location,
            listing_type="for_sale",
            limit=3,
            return_type="pandas"
        )

        pydantic_result = scrape_property(
            location=location,
            listing_type="for_sale",
            limit=3,
            return_type="pydantic"
        )

        raw_result = scrape_property(
            location=location,
            listing_type="for_sale",
            limit=3,
            return_type="raw"
        )

        # Validate pandas return type
        assert isinstance(pandas_result, pd.DataFrame), f"pandas result should be DataFrame for {search_type}"
        assert len(pandas_result) > 0, f"pandas result should not be empty for {search_type}"

        required_columns = ["property_id", "property_url", "list_price", "status", "formatted_address"]
        for col in required_columns:
            assert col in pandas_result.columns, f"Missing column {col} in pandas result for {search_type}"

        # Validate pydantic return type
        assert isinstance(pydantic_result, list), f"pydantic result should be list for {search_type}"
        assert len(pydantic_result) > 0, f"pydantic result should not be empty for {search_type}"

        for item in pydantic_result:
            assert isinstance(item, Property), f"pydantic items should be Property objects for {search_type}"
            assert item.property_id is not None, f"property_id should not be None for {search_type}"

        # Validate raw return type
        assert isinstance(raw_result, list), f"raw result should be list for {search_type}"
        assert len(raw_result) > 0, f"raw result should not be empty for {search_type}"

        for item in raw_result:
            assert isinstance(item, dict), f"raw items should be dict for {search_type}"
            assert "property_id" in item, f"raw items should have property_id for {search_type}"
            assert "href" in item, f"raw items should have href for {search_type}"

        # Cross-validate that different return types return related data
        pandas_ids = set(pandas_result["property_id"].tolist())
        pydantic_ids = set(prop.property_id for prop in pydantic_result)
        raw_ids = set(item["property_id"] for item in raw_result)

        # All return types should have some properties
        assert len(pandas_ids) > 0, f"pandas should return properties for {search_type}"
        assert len(pydantic_ids) > 0, f"pydantic should return properties for {search_type}"
        assert len(raw_ids) > 0, f"raw should return properties for {search_type}"


def test_pending_date_filtering():
    """Test that pending properties are properly filtered by pending_date using client-side filtering."""

    # Test 1: Verify that date filtering works with different time windows
    result_no_filter = scrape_property(
        location="Dallas, TX",
        listing_type="pending",
        limit=20
    )

    result_30_days = scrape_property(
        location="Dallas, TX",
        listing_type="pending",
        past_days=30,
        limit=20
    )

    result_10_days = scrape_property(
        location="Dallas, TX",
        listing_type="pending",
        past_days=10,
        limit=20
    )

    # Basic assertions - we should get some results
    assert result_no_filter is not None and len(result_no_filter) >= 0
    assert result_30_days is not None and len(result_30_days) >= 0
    assert result_10_days is not None and len(result_10_days) >= 0

    # Filtering should work: longer periods should return same or more results
    assert len(result_30_days) <= len(result_no_filter), "30-day filter should return <= unfiltered results"
    assert len(result_10_days) <= len(result_30_days), "10-day filter should return <= 30-day results"

    # Test 2: Verify that date range filtering works
    if len(result_no_filter) > 0:
        result_date_range = scrape_property(
            location="Dallas, TX",
            listing_type="pending",
            date_from="2025-08-01",
            date_to="2025-12-31",
            limit=20
        )

        assert result_date_range is not None
        # Date range should capture recent properties
        assert len(result_date_range) >= 0

    # Test 3: Verify that both pending and contingent properties are included
    # Get raw data to check property types
    if len(result_no_filter) > 0:
        raw_result = scrape_property(
            location="Dallas, TX",
            listing_type="pending",
            return_type="raw",
            limit=15
        )

        if raw_result:
            # Check that we get both pending and contingent properties
            pending_count = 0
            contingent_count = 0

            for prop in raw_result:
                flags = prop.get('flags', {})
                if flags.get('is_pending'):
                    pending_count += 1
                if flags.get('is_contingent'):
                    contingent_count += 1

            # We should get at least one of each type (when available)
            total_properties = pending_count + contingent_count
            assert total_properties > 0, "Should find at least some pending or contingent properties"


def test_hour_based_filtering():
    """Test the new past_hours parameter for hour-level filtering"""
    from datetime import datetime, timedelta

    # Test for sold properties with 24-hour filter
    result_24h = scrape_property(
        location="Phoenix, AZ",
        listing_type="sold",
        past_hours=24,
        limit=50
    )

    # Test for sold properties with 12-hour filter
    result_12h = scrape_property(
        location="Phoenix, AZ",
        listing_type="sold",
        past_hours=12,
        limit=50
    )

    assert result_24h is not None
    assert result_12h is not None

    # 12-hour filter should return same or fewer results than 24-hour
    if len(result_12h) > 0 and len(result_24h) > 0:
        assert len(result_12h) <= len(result_24h), "12-hour results should be <= 24-hour results"

    # Verify timestamps are within the specified hour range for 24h filter
    if len(result_24h) > 0:
        cutoff_time = datetime.now() - timedelta(hours=24)

        # Check a few results
        for idx in range(min(5, len(result_24h))):
            sold_date_str = result_24h.iloc[idx]["last_sold_date"]
            if pd.notna(sold_date_str):
                try:
                    sold_date = datetime.strptime(str(sold_date_str), "%Y-%m-%d %H:%M:%S")
                    # Date should be within last 24 hours
                    assert sold_date >= cutoff_time, f"Property sold date {sold_date} should be within last 24 hours"
                except (ValueError, TypeError):
                    pass  # Skip if date parsing fails


def test_past_hours_all_listing_types():
    """Validate that past_hours works correctly for all listing types with proper date fields"""
    from datetime import datetime, timedelta

    # Test 1: SOLD (uses last_sold_date field, server-side filters by sold_date)
    result_sold = scrape_property(
        location="Dallas, TX",
        listing_type="sold",
        past_hours=48,
        limit=20
    )

    assert result_sold is not None
    if len(result_sold) > 0:
        cutoff_48h = datetime.now() - timedelta(hours=48)

        # Verify results use sold_date and are within 48 hours
        for idx in range(min(5, len(result_sold))):
            sold_date_str = result_sold.iloc[idx]["last_sold_date"]
            if pd.notna(sold_date_str):
                try:
                    sold_date = datetime.strptime(str(sold_date_str), "%Y-%m-%d %H:%M:%S")
                    assert sold_date >= cutoff_48h, \
                        f"SOLD: last_sold_date {sold_date} should be within 48 hours"
                except (ValueError, TypeError):
                    pass

    # Test 2: FOR_SALE (uses list_date field, server-side filters by list_date)
    result_for_sale = scrape_property(
        location="Austin, TX",
        listing_type="for_sale",
        past_hours=48,
        limit=20
    )

    assert result_for_sale is not None
    if len(result_for_sale) > 0:
        cutoff_48h = datetime.now() - timedelta(hours=48)

        # Verify results use list_date and are within 48 hours
        for idx in range(min(5, len(result_for_sale))):
            list_date_str = result_for_sale.iloc[idx]["list_date"]
            if pd.notna(list_date_str):
                try:
                    list_date = datetime.strptime(str(list_date_str), "%Y-%m-%d %H:%M:%S")
                    assert list_date >= cutoff_48h, \
                        f"FOR_SALE: list_date {list_date} should be within 48 hours"
                except (ValueError, TypeError):
                    pass

    # Test 3: FOR_RENT (uses list_date field, server-side filters by list_date)
    result_for_rent = scrape_property(
        location="Houston, TX",
        listing_type="for_rent",
        past_hours=72,
        limit=20
    )

    assert result_for_rent is not None
    if len(result_for_rent) > 0:
        cutoff_72h = datetime.now() - timedelta(hours=72)

        # Verify results use list_date and are within 72 hours
        for idx in range(min(5, len(result_for_rent))):
            list_date_str = result_for_rent.iloc[idx]["list_date"]
            if pd.notna(list_date_str):
                try:
                    list_date = datetime.strptime(str(list_date_str), "%Y-%m-%d %H:%M:%S")
                    assert list_date >= cutoff_72h, \
                        f"FOR_RENT: list_date {list_date} should be within 72 hours"
                except (ValueError, TypeError):
                    pass

    # Test 4: PENDING (uses pending_date field, client-side filtering only)
    result_pending = scrape_property(
        location="San Antonio, TX",
        listing_type="pending",
        past_hours=48,
        limit=20
    )

    assert result_pending is not None
    # Note: PENDING doesn't use server-side date filtering (API filters broken)
    # Client-side filtering should still work via pending_date
    if len(result_pending) > 0:
        cutoff_48h = datetime.now() - timedelta(hours=48)

        # Verify results use pending_date (or are contingent without date)
        for idx in range(min(5, len(result_pending))):
            pending_date_str = result_pending.iloc[idx]["pending_date"]
            if pd.notna(pending_date_str):
                try:
                    pending_date = datetime.strptime(str(pending_date_str), "%Y-%m-%d %H:%M:%S")
                    assert pending_date >= cutoff_48h, \
                        f"PENDING: pending_date {pending_date} should be within 48 hours"
                except (ValueError, TypeError):
                    pass
            # else: property is contingent without pending_date, which is allowed


def test_datetime_filtering():
    """Test date_from and date_to parameters with hour precision"""
    from datetime import datetime, timedelta

    # Get a recent date range (e.g., yesterday)
    yesterday = datetime.now() - timedelta(days=1)
    date_str = yesterday.strftime("%Y-%m-%d")

    # Test filtering for business hours (9 AM to 5 PM) on a specific day
    result = scrape_property(
        location="Dallas, TX",
        listing_type="for_sale",
        date_from=f"{date_str}T09:00:00",
        date_to=f"{date_str}T17:00:00",
        limit=30
    )

    assert result is not None

    # Test with only date_from
    result_from_only = scrape_property(
        location="Houston, TX",
        listing_type="for_sale",
        date_from=f"{date_str}T00:00:00",
        limit=30
    )

    assert result_from_only is not None

    # Test with only date_to
    result_to_only = scrape_property(
        location="Austin, TX",
        listing_type="for_sale",
        date_to=f"{date_str}T23:59:59",
        limit=30
    )

    assert result_to_only is not None


def test_full_datetime_preservation():
    """Verify that dates now include full timestamps (YYYY-MM-DD HH:MM:SS)"""

    # Test with pandas return type
    result_pandas = scrape_property(
        location="San Diego, CA",
        listing_type="sold",
        past_days=30,
        limit=10
    )

    assert result_pandas is not None and len(result_pandas) > 0

    # Check that date fields contain time information
    if len(result_pandas) > 0:
        for idx in range(min(3, len(result_pandas))):
            # Check last_sold_date
            sold_date = result_pandas.iloc[idx]["last_sold_date"]
            if pd.notna(sold_date):
                sold_date_str = str(sold_date)
                # Should contain time (HH:MM:SS), not just date
                assert " " in sold_date_str or "T" in sold_date_str, \
                    f"Date should include time component: {sold_date_str}"

    # Test with pydantic return type
    result_pydantic = scrape_property(
        location="Los Angeles, CA",
        listing_type="for_sale",
        past_days=7,
        limit=10,
        return_type="pydantic"
    )

    assert result_pydantic is not None and len(result_pydantic) > 0

    # Verify Property objects have datetime objects with time info
    for prop in result_pydantic[:3]:
        if prop.list_date:
            # Should be a datetime object, not just a date
            assert hasattr(prop.list_date, 'hour'), "list_date should be a datetime with time"


def test_beds_filtering():
    """Test bedroom filtering with beds_min and beds_max"""

    result = scrape_property(
        location="Atlanta, GA",
        listing_type="for_sale",
        beds_min=2,
        beds_max=4,
        limit=50
    )

    assert result is not None and len(result) > 0

    # Verify all properties have 2-4 bedrooms
    for idx in range(min(10, len(result))):
        beds = result.iloc[idx]["beds"]
        if pd.notna(beds):
            assert 2 <= beds <= 4, f"Property should have 2-4 beds, got {beds}"

    # Test beds_min only
    result_min = scrape_property(
        location="Denver, CO",
        listing_type="for_sale",
        beds_min=3,
        limit=30
    )

    assert result_min is not None

    # Test beds_max only
    result_max = scrape_property(
        location="Seattle, WA",
        listing_type="for_sale",
        beds_max=2,
        limit=30
    )

    assert result_max is not None


def test_baths_filtering():
    """Test bathroom filtering with baths_min and baths_max"""

    result = scrape_property(
        location="Miami, FL",
        listing_type="for_sale",
        baths_min=2.0,
        baths_max=3.5,
        limit=50
    )

    assert result is not None and len(result) > 0

    # Verify bathrooms are within range
    for idx in range(min(10, len(result))):
        full_baths = result.iloc[idx]["full_baths"]
        half_baths = result.iloc[idx]["half_baths"]

        if pd.notna(full_baths):
            total_baths = float(full_baths) + (float(half_baths) * 0.5 if pd.notna(half_baths) else 0)
            # Allow some tolerance as API might calculate differently
            if total_baths > 0:
                assert total_baths >= 1.5, f"Baths should be >= 2.0, got {total_baths}"


def test_sqft_filtering():
    """Test square footage filtering"""

    result = scrape_property(
        location="Portland, OR",
        listing_type="for_sale",
        sqft_min=1000,
        sqft_max=2500,
        limit=50
    )

    assert result is not None and len(result) > 0

    # Verify sqft is within range
    for idx in range(min(10, len(result))):
        sqft = result.iloc[idx]["sqft"]
        if pd.notna(sqft) and sqft > 0:
            assert 1000 <= sqft <= 2500, f"Sqft should be 1000-2500, got {sqft}"


def test_price_filtering():
    """Test price range filtering"""

    result = scrape_property(
        location="Charlotte, NC",
        listing_type="for_sale",
        price_min=200000,
        price_max=500000,
        limit=50
    )

    assert result is not None and len(result) > 0

    # Verify prices are within range
    for idx in range(min(15, len(result))):
        price = result.iloc[idx]["list_price"]
        if pd.notna(price) and price > 0:
            assert 200000 <= price <= 500000, f"Price should be $200k-$500k, got ${price}"


def test_lot_sqft_filtering():
    """Test lot size filtering"""

    result = scrape_property(
        location="Scottsdale, AZ",
        listing_type="for_sale",
        lot_sqft_min=5000,
        lot_sqft_max=15000,
        limit=30
    )

    assert result is not None
    # Results might be fewer if lot_sqft data is sparse


def test_year_built_filtering():
    """Test year built filtering"""

    result = scrape_property(
        location="Tampa, FL",
        listing_type="for_sale",
        year_built_min=2000,
        year_built_max=2024,
        limit=50
    )

    assert result is not None and len(result) > 0

    # Verify year_built is within range
    for idx in range(min(10, len(result))):
        year = result.iloc[idx]["year_built"]
        if pd.notna(year) and year > 0:
            assert 2000 <= year <= 2024, f"Year should be 2000-2024, got {year}"


def test_combined_filters():
    """Test multiple filters working together"""

    result = scrape_property(
        location="Nashville, TN",
        listing_type="for_sale",
        beds_min=3,
        baths_min=2.0,
        sqft_min=1500,
        price_min=250000,
        price_max=600000,
        year_built_min=1990,
        limit=30
    )

    assert result is not None

    # If we get results, verify they meet ALL criteria
    if len(result) > 0:
        for idx in range(min(5, len(result))):
            row = result.iloc[idx]

            # Check beds
            if pd.notna(row["beds"]):
                assert row["beds"] >= 3, f"Beds should be >= 3, got {row['beds']}"

            # Check sqft
            if pd.notna(row["sqft"]) and row["sqft"] > 0:
                assert row["sqft"] >= 1500, f"Sqft should be >= 1500, got {row['sqft']}"

            # Check price
            if pd.notna(row["list_price"]) and row["list_price"] > 0:
                assert 250000 <= row["list_price"] <= 600000, \
                    f"Price should be $250k-$600k, got ${row['list_price']}"

            # Check year
            if pd.notna(row["year_built"]) and row["year_built"] > 0:
                assert row["year_built"] >= 1990, \
                    f"Year should be >= 1990, got {row['year_built']}"


def test_sorting_by_price():
    """Test sorting by list_price with actual sort order validation"""

    # Sort ascending (cheapest first) with multi-page limit to test concatenation
    result_asc = scrape_property(
        location="Orlando, FL",
        listing_type="for_sale",
        sort_by="list_price",
        sort_direction="asc",
        limit=250  # Multi-page to test concatenation logic
    )

    assert result_asc is not None and len(result_asc) > 0

    # Verify ascending sort order (allow for None/NA values at the end)
    prices_asc = result_asc["list_price"].dropna().tolist()
    assert len(prices_asc) > 0, "No properties with prices found"
    assert prices_asc == sorted(prices_asc), f"Prices not in ascending order: {prices_asc[:10]}"

    # Sort descending (most expensive first)
    result_desc = scrape_property(
        location="San Antonio, TX",
        listing_type="for_sale",
        sort_by="list_price",
        sort_direction="desc",
        limit=250  # Multi-page to test concatenation logic
    )

    assert result_desc is not None and len(result_desc) > 0

    # Verify descending sort order (allow for None/NA values at the end)
    prices_desc = result_desc["list_price"].dropna().tolist()
    assert len(prices_desc) > 0, "No properties with prices found"
    assert prices_desc == sorted(prices_desc, reverse=True), f"Prices not in descending order: {prices_desc[:10]}"


def test_sorting_by_date():
    """Test sorting by list_date with actual sort order validation"""

    # Test descending (newest first) with multi-page limit
    result_desc = scrape_property(
        location="Columbus, OH",
        listing_type="for_sale",
        sort_by="list_date",
        sort_direction="desc",  # Newest first
        limit=250  # Multi-page to test concatenation logic
    )

    assert result_desc is not None and len(result_desc) > 0

    # Verify descending sort order (allow for None/NA values at the end)
    dates_desc = result_desc["list_date"].dropna().tolist()
    assert len(dates_desc) > 0, "No properties with dates found"
    assert dates_desc == sorted(dates_desc, reverse=True), f"Dates not in descending order (newest first): {dates_desc[:10]}"

    # Test ascending (oldest first)
    result_asc = scrape_property(
        location="Columbus, OH",
        listing_type="for_sale",
        sort_by="list_date",
        sort_direction="asc",  # Oldest first
        limit=250
    )

    assert result_asc is not None and len(result_asc) > 0

    # Verify ascending sort order
    dates_asc = result_asc["list_date"].dropna().tolist()
    assert len(dates_asc) > 0, "No properties with dates found"
    assert dates_asc == sorted(dates_asc), f"Dates not in ascending order (oldest first): {dates_asc[:10]}"


def test_sorting_by_sqft():
    """Test sorting by square footage with actual sort order validation"""

    # Test descending (largest first) with multi-page limit
    result_desc = scrape_property(
        location="Indianapolis, IN",
        listing_type="for_sale",
        sort_by="sqft",
        sort_direction="desc",  # Largest first
        limit=250  # Multi-page to test concatenation logic
    )

    assert result_desc is not None and len(result_desc) > 0

    # Verify descending sort order (allow for None/NA values at the end)
    sqfts_desc = result_desc["sqft"].dropna().tolist()
    assert len(sqfts_desc) > 0, "No properties with sqft found"
    assert sqfts_desc == sorted(sqfts_desc, reverse=True), f"Square footages not in descending order: {sqfts_desc[:10]}"

    # Test ascending (smallest first)
    result_asc = scrape_property(
        location="Indianapolis, IN",
        listing_type="for_sale",
        sort_by="sqft",
        sort_direction="asc",  # Smallest first
        limit=250
    )

    assert result_asc is not None and len(result_asc) > 0

    # Verify ascending sort order
    sqfts_asc = result_asc["sqft"].dropna().tolist()
    assert len(sqfts_asc) > 0, "No properties with sqft found"
    assert sqfts_asc == sorted(sqfts_asc), f"Square footages not in ascending order: {sqfts_asc[:10]}"


def test_filter_validation_errors():
    """Test that validation catches invalid parameters"""
    import pytest

    # Test: beds_min > beds_max should raise ValueError
    with pytest.raises(ValueError, match="beds_min.*cannot be greater than.*beds_max"):
        scrape_property(
            location="Boston, MA",
            listing_type="for_sale",
            beds_min=5,
            beds_max=2,
            limit=10
        )

    # Test: invalid datetime format should raise exception
    with pytest.raises(Exception):  # InvalidDate
        scrape_property(
            location="Boston, MA",
            listing_type="for_sale",
            datetime_from="not-a-valid-datetime",
            limit=10
        )

    # Test: invalid sort_by value should raise ValueError
    with pytest.raises(ValueError, match="Invalid sort_by"):
        scrape_property(
            location="Boston, MA",
            listing_type="for_sale",
            sort_by="invalid_field",
            limit=10
        )

    # Test: invalid sort_direction should raise ValueError
    with pytest.raises(ValueError, match="Invalid sort_direction"):
        scrape_property(
            location="Boston, MA",
            listing_type="for_sale",
            sort_by="list_price",
            sort_direction="invalid",
            limit=10
        )


def test_backward_compatibility():
    """Ensure old parameters still work as expected"""

    # Test past_days still works
    result_past_days = scrape_property(
        location="Las Vegas, NV",
        listing_type="sold",
        past_days=30,
        limit=20
    )

    assert result_past_days is not None and len(result_past_days) > 0

    # Test date_from/date_to still work
    result_date_range = scrape_property(
        location="Memphis, TN",
        listing_type="sold",
        date_from="2024-01-01",
        date_to="2024-03-31",
        limit=20
    )

    assert result_date_range is not None

    # Test property_type still works
    result_property_type = scrape_property(
        location="Louisville, KY",
        listing_type="for_sale",
        property_type=["single_family"],
        limit=20
    )

    assert result_property_type is not None and len(result_property_type) > 0

    # Test foreclosure still works
    result_foreclosure = scrape_property(
        location="Detroit, MI",
        listing_type="for_sale",
        foreclosure=True,
        limit=15
    )

    assert result_foreclosure is not None


def test_last_status_change_date_field():
    """Test that last_status_change_date field is present and has hour-level precision"""
    from datetime import datetime

    # Test 1: Field is present in SOLD listings
    result_sold = scrape_property(
        location="Phoenix, AZ",
        listing_type="sold",
        past_days=30,
        limit=20
    )

    assert result_sold is not None and len(result_sold) > 0

    # Check that last_status_change_date column exists
    assert "last_status_change_date" in result_sold.columns, \
        "last_status_change_date column should be present in results"

    # Check that at least some properties have this field populated
    has_status_change_date = False
    for idx in range(min(10, len(result_sold))):
        status_change_date_str = result_sold.iloc[idx]["last_status_change_date"]
        if pd.notna(status_change_date_str):
            has_status_change_date = True
            # Verify it has hour-level precision (includes time)
            assert " " in str(status_change_date_str) or "T" in str(status_change_date_str), \
                f"last_status_change_date should include time component: {status_change_date_str}"
            break

    # Note: It's possible some properties don't have this field, so we just verify it exists
    # assert has_status_change_date, "At least some properties should have last_status_change_date"

    # Test 2: Field is present in PENDING listings
    result_pending = scrape_property(
        location="Dallas, TX",
        listing_type="pending",
        past_days=30,
        limit=20
    )

    assert result_pending is not None
    # Only check columns if we have results (empty DataFrame has no columns)
    if len(result_pending) > 0:
        assert "last_status_change_date" in result_pending.columns, \
            "last_status_change_date column should be present in PENDING results"

    # Test 3: Field is present in FOR_SALE listings
    result_for_sale = scrape_property(
        location="Austin, TX",
        listing_type="for_sale",
        past_days=7,
        limit=20
    )

    assert result_for_sale is not None and len(result_for_sale) > 0
    assert "last_status_change_date" in result_for_sale.columns, \
        "last_status_change_date column should be present in FOR_SALE results"


def test_last_status_change_date_precision_enhancement():
    """Test that pending_date and last_sold_date use hour-precision from last_status_change_date"""
    from datetime import datetime

    # Test with pydantic return type to examine actual Property objects
    # Use a larger time window to ensure we get some results
    result_sold = scrape_property(
        location="Phoenix, AZ",
        listing_type="sold",
        past_days=90,
        limit=30,
        return_type="pydantic"
    )

    assert result_sold is not None

    # Only run assertions if we have data (data availability may vary)
    if len(result_sold) > 0:
        # Check that dates have hour-level precision (not just date)
        for prop in result_sold[:10]:
            # If both last_sold_date and last_status_change_date exist
            if prop.last_sold_date and prop.last_status_change_date:
                # Both should be datetime objects with time info
                assert hasattr(prop.last_sold_date, 'hour'), \
                    "last_sold_date should have hour precision"
                assert hasattr(prop.last_status_change_date, 'hour'), \
                    "last_status_change_date should have hour precision"

                # If they're on the same day, the processor should have used
                # last_status_change_date to provide hour precision for last_sold_date
                if prop.last_sold_date.date() == prop.last_status_change_date.date():
                    # They should have the same timestamp (hour/minute/second)
                    assert prop.last_sold_date == prop.last_status_change_date, \
                        "last_sold_date should match last_status_change_date for hour precision"

    # Test with PENDING listings
    result_pending = scrape_property(
        location="Dallas, TX",
        listing_type="pending",
        past_days=90,
        limit=30,
        return_type="pydantic"
    )

    assert result_pending is not None

    # Only run assertions if we have data
    if len(result_pending) > 0:
        for prop in result_pending[:10]:
            # If both pending_date and last_status_change_date exist
            if prop.pending_date and prop.last_status_change_date:
                assert hasattr(prop.pending_date, 'hour'), \
                    "pending_date should have hour precision"
                assert hasattr(prop.last_status_change_date, 'hour'), \
                    "last_status_change_date should have hour precision"

                # If they're on the same day, pending_date should use the time from last_status_change_date
                if prop.pending_date.date() == prop.last_status_change_date.date():
                    assert prop.pending_date == prop.last_status_change_date, \
                        "pending_date should match last_status_change_date for hour precision"


def test_last_status_change_date_filtering_fallback():
    """Test that filtering falls back to last_status_change_date when primary date is missing"""
    from datetime import datetime, timedelta

    # This test verifies that if a property doesn't have the primary date field
    # (e.g., pending_date for PENDING listings), it can still be filtered using
    # last_status_change_date as a fallback

    # Test with PENDING properties using past_hours (client-side filtering)
    result_pending = scrape_property(
        location="Miami, FL",
        listing_type="pending",
        past_hours=72,
        limit=30
    )

    assert result_pending is not None

    # If we get results, verify they have either pending_date or last_status_change_date
    if len(result_pending) > 0:
        cutoff_time = datetime.now() - timedelta(hours=72)

        for idx in range(min(5, len(result_pending))):
            pending_date_str = result_pending.iloc[idx]["pending_date"]
            status_change_date_str = result_pending.iloc[idx]["last_status_change_date"]

            # At least one of these should be present for filtering to work
            has_date = pd.notna(pending_date_str) or pd.notna(status_change_date_str)

            # Note: Contingent properties without dates are allowed, so we don't assert here
            # The test just verifies the field exists and can be used


def test_last_status_change_date_hour_filtering():
    """Test that past_hours filtering works correctly with last_status_change_date for PENDING/SOLD"""
    from datetime import datetime, timedelta

    # Test with SOLD properties
    result_sold = scrape_property(
        location="Atlanta, GA",
        listing_type="sold",
        past_hours=48,
        limit=30
    )

    assert result_sold is not None

    if len(result_sold) > 0:
        cutoff_time = datetime.now() - timedelta(hours=48)

        # Verify that results are within 48 hours
        for idx in range(min(5, len(result_sold))):
            sold_date_str = result_sold.iloc[idx]["last_sold_date"]
            if pd.notna(sold_date_str):
                try:
                    sold_date = datetime.strptime(str(sold_date_str), "%Y-%m-%d %H:%M:%S")
                    # Should be within 48 hours with hour-level precision
                    assert sold_date >= cutoff_time, \
                        f"SOLD property last_sold_date {sold_date} should be within 48 hours of {cutoff_time}"
                except (ValueError, TypeError):
                    pass  # Skip if parsing fails

    # Test with PENDING properties
    result_pending = scrape_property(
        location="Denver, CO",
        listing_type="pending",
        past_hours=48,
        limit=30
    )

    assert result_pending is not None

    if len(result_pending) > 0:
        cutoff_time = datetime.now() - timedelta(hours=48)

        # Verify that results are within 48 hours
        for idx in range(min(5, len(result_pending))):
            pending_date_str = result_pending.iloc[idx]["pending_date"]
            if pd.notna(pending_date_str):
                try:
                    pending_date = datetime.strptime(str(pending_date_str), "%Y-%m-%d %H:%M:%S")
                    # Should be within 48 hours with hour-level precision
                    assert pending_date >= cutoff_time, \
                        f"PENDING property pending_date {pending_date} should be within 48 hours of {cutoff_time}"
                except (ValueError, TypeError):
                    pass  # Skip if parsing fails


def test_exclude_pending_with_raw_data():
    """Test that exclude_pending parameter works correctly with return_type='raw'"""

    # Query for sale properties with exclude_pending=True and raw data
    result = scrape_property(
        location="Phoenix, AZ",
        listing_type="for_sale",
        exclude_pending=True,
        return_type="raw",
        limit=50
    )

    assert result is not None and len(result) > 0

    # Verify that no pending or contingent properties are in the results
    for prop in result:
        flags = prop.get('flags', {})
        is_pending = flags.get('is_pending', False)
        is_contingent = flags.get('is_contingent', False)

        assert not is_pending, f"Property {prop.get('property_id')} should not be pending when exclude_pending=True"
        assert not is_contingent, f"Property {prop.get('property_id')} should not be contingent when exclude_pending=True"


def test_mls_only_with_raw_data():
    """Test that mls_only parameter works correctly with return_type='raw'"""

    # Query with mls_only=True and raw data
    result = scrape_property(
        location="Dallas, TX",
        listing_type="for_sale",
        mls_only=True,
        return_type="raw",
        limit=50
    )

    assert result is not None and len(result) > 0

    # Verify that all properties have MLS IDs (stored in source.id)
    for prop in result:
        source = prop.get('source', {})
        mls_id = source.get('id') if source else None

        assert mls_id is not None and mls_id != "", \
            f"Property {prop.get('property_id')} should have an MLS ID (source.id) when mls_only=True, got: {mls_id}"


def test_combined_filters_with_raw_data():
    """Test that both exclude_pending and mls_only work together with return_type='raw'"""

    # Query with both filters enabled and raw data
    result = scrape_property(
        location="Austin, TX",
        listing_type="for_sale",
        exclude_pending=True,
        mls_only=True,
        return_type="raw",
        limit=30
    )

    assert result is not None and len(result) > 0

    # Verify both filters are applied
    for prop in result:
        # Check exclude_pending filter
        flags = prop.get('flags', {})
        is_pending = flags.get('is_pending', False)
        is_contingent = flags.get('is_contingent', False)

        assert not is_pending, f"Property {prop.get('property_id')} should not be pending"
        assert not is_contingent, f"Property {prop.get('property_id')} should not be contingent"

        # Check mls_only filter
        source = prop.get('source', {})
        mls_id = source.get('id') if source else None

        assert mls_id is not None and mls_id != "", \
            f"Property {prop.get('property_id')} should have an MLS ID (source.id)"


def test_updated_since_filtering():
    """Test the updated_since parameter for filtering by last_update_date"""
    from datetime import datetime, timedelta

    # Test 1: Filter by last update in past 10 minutes (user's example)
    cutoff_time = datetime.now() - timedelta(minutes=10)
    result_10min = scrape_property(
        location="California",
        updated_since=cutoff_time,
        sort_by="last_update_date",
        sort_direction="desc",
        limit=100
    )

    assert result_10min is not None
    print(f"\n10-minute window returned {len(result_10min)} properties")

    # Test 2: Verify all results have last_update_date within range
    if len(result_10min) > 0:
        for idx in range(min(10, len(result_10min))):
            update_date_str = result_10min.iloc[idx]["last_update_date"]
            if pd.notna(update_date_str):
                try:
                    # Handle timezone-aware datetime strings
                    date_str = str(update_date_str)
                    if '+' in date_str or date_str.endswith('Z'):
                        # Remove timezone for comparison with naive cutoff_time
                        date_str = date_str.replace('+00:00', '').replace('Z', '')
                    update_date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")

                    assert update_date >= cutoff_time, \
                        f"Property last_update_date {update_date} should be >= {cutoff_time}"
                    print(f"Property {idx}: last_update_date = {update_date} (valid)")
                except (ValueError, TypeError) as e:
                    print(f"Warning: Could not parse date {update_date_str}: {e}")

    # Test 3: Compare different time windows
    result_1hour = scrape_property(
        location="California",
        updated_since=datetime.now() - timedelta(hours=1),
        limit=50
    )

    result_24hours = scrape_property(
        location="California",
        updated_since=datetime.now() - timedelta(hours=24),
        limit=50
    )

    print(f"1-hour window: {len(result_1hour)} properties")
    print(f"24-hour window: {len(result_24hours)} properties")

    # Longer time window should return same or more results
    if len(result_1hour) > 0 and len(result_24hours) > 0:
        assert len(result_1hour) <= len(result_24hours), \
            "1-hour filter should return <= 24-hour results"

    # Test 4: Verify sorting works with filtering
    if len(result_10min) > 1:
        # Get non-null dates
        dates = []
        for idx in range(len(result_10min)):
            date_str = result_10min.iloc[idx]["last_update_date"]
            if pd.notna(date_str):
                try:
                    # Handle timezone-aware datetime strings
                    clean_date_str = str(date_str)
                    if '+' in clean_date_str or clean_date_str.endswith('Z'):
                        clean_date_str = clean_date_str.replace('+00:00', '').replace('Z', '')
                    dates.append(datetime.strptime(clean_date_str, "%Y-%m-%d %H:%M:%S"))
                except (ValueError, TypeError):
                    pass

        if len(dates) > 1:
            # Check if sorted descending
            for i in range(len(dates) - 1):
                assert dates[i] >= dates[i + 1], \
                    f"Results should be sorted by last_update_date descending: {dates[i]} >= {dates[i+1]}"


def test_updated_since_optimization():
    """Test that updated_since optimization works (auto-sort + early termination)"""
    from datetime import datetime, timedelta
    import time

    # Test 1: Verify auto-sort is applied when using updated_since without explicit sort
    start_time = time.time()
    result = scrape_property(
        location="California",
        updated_since=datetime.now() - timedelta(minutes=5),
        # NO sort_by specified - should auto-apply sort_by="last_update_date"
        limit=50
    )
    elapsed_time = time.time() - start_time

    print(f"\nAuto-sort test: {len(result)} properties in {elapsed_time:.2f}s")

    # Should complete quickly due to early termination optimization (<5 seconds)
    assert elapsed_time < 5.0, f"Query should be fast with optimization, took {elapsed_time:.2f}s"

    # Verify results are sorted by last_update_date (proving auto-sort worked)
    if len(result) > 1:
        dates = []
        for idx in range(min(10, len(result))):
            date_str = result.iloc[idx]["last_update_date"]
            if pd.notna(date_str):
                try:
                    clean_date_str = str(date_str)
                    if '+' in clean_date_str or clean_date_str.endswith('Z'):
                        clean_date_str = clean_date_str.replace('+00:00', '').replace('Z', '')
                    dates.append(datetime.strptime(clean_date_str, "%Y-%m-%d %H:%M:%S"))
                except (ValueError, TypeError):
                    pass

        if len(dates) > 1:
            # Verify descending order (most recent first)
            for i in range(len(dates) - 1):
                assert dates[i] >= dates[i + 1], \
                    "Auto-applied sort should order by last_update_date descending"

    print("Auto-sort optimization verified ✓")


def test_pending_date_optimization():
    """Test that PENDING + date filters get auto-sort and early termination"""
    from datetime import datetime, timedelta
    import time

    # Test: Verify auto-sort is applied for PENDING with past_days
    start_time = time.time()
    result = scrape_property(
        location="California",
        listing_type="pending",
        past_days=7,
        # NO sort_by specified - should auto-apply sort_by="pending_date"
        limit=50
    )
    elapsed_time = time.time() - start_time

    print(f"\nPENDING auto-sort test: {len(result)} properties in {elapsed_time:.2f}s")

    # Should complete quickly due to optimization (<10 seconds)
    assert elapsed_time < 10.0, f"PENDING query should be fast with optimization, took {elapsed_time:.2f}s"

    # Verify results are sorted by pending_date (proving auto-sort worked)
    if len(result) > 1:
        dates = []
        for idx in range(min(10, len(result))):
            date_str = result.iloc[idx]["pending_date"]
            if pd.notna(date_str):
                try:
                    clean_date_str = str(date_str)
                    if '+' in clean_date_str or clean_date_str.endswith('Z'):
                        clean_date_str = clean_date_str.replace('+00:00', '').replace('Z', '')
                    dates.append(datetime.strptime(clean_date_str, "%Y-%m-%d %H:%M:%S"))
                except (ValueError, TypeError):
                    pass

        if len(dates) > 1:
            # Verify descending order (most recent first)
            for i in range(len(dates) - 1):
                assert dates[i] >= dates[i + 1], \
                    "PENDING auto-applied sort should order by pending_date descending"

    print("PENDING optimization verified ✓")


def test_basic_last_update_date():
    from datetime import datetime, timedelta

    # Test with naive datetime (treated as local time)
    now = datetime.now()

    properties = scrape_property(
        "California",
        updated_since=now - timedelta(minutes=10),
        sort_by="last_update_date",
        sort_direction="desc"
    )

    # Convert now to timezone-aware for comparison with UTC dates in DataFrame
    now_utc = now.astimezone(tz=pytz.timezone("UTC"))

    # Check all last_update_date values are <= now
    assert (properties["last_update_date"] <= now_utc).all()

    # Verify we got some results
    assert len(properties) > 0


def test_timezone_aware_last_update_date():
    """Test that timezone-aware datetimes work correctly for updated_since"""
    from datetime import datetime, timedelta, timezone

    # Test with timezone-aware datetime (explicit UTC)
    now_utc = datetime.now(timezone.utc)

    properties = scrape_property(
        "California",
        updated_since=now_utc - timedelta(minutes=10),
        sort_by="last_update_date",
        sort_direction="desc"
    )

    # Check all last_update_date values are <= now
    assert (properties["last_update_date"] <= now_utc).all()

    # Verify we got some results
    assert len(properties) > 0


def test_timezone_handling_date_range():
    """Test timezone handling for date_from and date_to parameters"""
    from datetime import datetime, timedelta

    # Test with naive datetimes for date range (PENDING properties)
    now = datetime.now()
    three_days_ago = now - timedelta(days=3)

    properties = scrape_property(
        "California",
        listing_type="pending",
        date_from=three_days_ago,
        date_to=now
    )

    # Verify we got results and they're within the date range
    if len(properties) > 0:
        # Convert now to UTC for comparison
        now_utc = now.astimezone(tz=pytz.timezone("UTC"))
        assert (properties["pending_date"] <= now_utc).all()