Files
HomeHarvest/tests/test_realtor.py
2025-07-15 13:21:48 -07:00

386 lines
12 KiB
Python

from homeharvest import scrape_property, Property
import pandas as pd
def test_realtor_pending_or_contingent():
pending_or_contingent_result = scrape_property(location="Surprise, AZ", listing_type="pending")
regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale", exclude_pending=True)
assert all([result is not None for result in [pending_or_contingent_result, regular_result]])
assert len(pending_or_contingent_result) != len(regular_result)
def test_realtor_pending_comps():
pending_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="pending",
)
for_sale_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="for_sale",
)
sold_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="sold",
)
results = [pending_comps, for_sale_comps, sold_comps]
assert all([result is not None for result in results])
#: assert all lengths are different
assert len(set([len(result) for result in results])) == len(results)
def test_realtor_sold_past():
result = scrape_property(
location="San Diego, CA",
past_days=30,
listing_type="sold",
)
assert result is not None and len(result) > 0
def test_realtor_comps():
result = scrape_property(
location="2530 Al Lipscomb Way",
radius=0.5,
past_days=180,
listing_type="sold",
)
assert result is not None and len(result) > 0
def test_realtor_last_x_days_sold():
days_result_30 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=30)
days_result_10 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=10)
assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len(
days_result_10
)
def test_realtor_date_range_sold():
days_result_30 = scrape_property(
location="Dallas, TX", listing_type="sold", date_from="2023-05-01", date_to="2023-05-28"
)
days_result_60 = scrape_property(
location="Dallas, TX", listing_type="sold", date_from="2023-04-01", date_to="2023-06-10"
)
assert all([result is not None for result in [days_result_30, days_result_60]]) and len(days_result_30) < len(
days_result_60
)
def test_realtor_single_property():
results = [
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
listing_type="for_sale",
),
scrape_property(
location="2530 Al Lipscomb Way",
listing_type="for_sale",
),
]
assert all([result is not None for result in results])
def test_realtor():
results = [
scrape_property(
location="2530 Al Lipscomb Way",
listing_type="for_sale",
),
scrape_property(
location="Phoenix, AZ", listing_type="for_rent", limit=1000
), #: does not support "city, state, USA" format
scrape_property(
location="Dallas, TX", listing_type="sold", limit=1000
), #: does not support "city, state, USA" format
scrape_property(location="85281"),
]
assert all([result is not None for result in results])
def test_realtor_city():
results = scrape_property(location="Atlanta, GA", listing_type="for_sale", limit=1000)
assert results is not None and len(results) > 0
def test_realtor_land():
results = scrape_property(location="Atlanta, GA", listing_type="for_sale", property_type=["land"], limit=1000)
assert results is not None and len(results) > 0
def test_realtor_bad_address():
bad_results = scrape_property(
location="abceefg ju098ot498hh9",
listing_type="for_sale",
)
if len(bad_results) == 0:
assert True
def test_realtor_foreclosed():
foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=True)
not_foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=False)
assert len(foreclosed) != len(not_foreclosed)
def test_realtor_agent():
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale", limit=1000, extra_property_data=False)
assert scraped["agent_name"].nunique() > 1
def test_realtor_without_extra_details():
results = [
scrape_property(
location="00741",
listing_type="sold",
limit=10,
extra_property_data=False,
),
scrape_property(
location="00741",
listing_type="sold",
limit=10,
extra_property_data=True,
),
]
assert not results[0].equals(results[1])
def test_pr_zip_code():
results = scrape_property(
location="00741",
listing_type="for_sale",
)
assert results is not None and len(results) > 0
def test_exclude_pending():
results = scrape_property(
location="33567",
listing_type="pending",
exclude_pending=True,
)
assert results is not None and len(results) > 0
def test_style_value_error():
results = scrape_property(
location="Alaska, AK",
listing_type="sold",
extra_property_data=False,
limit=1000,
)
assert results is not None and len(results) > 0
def test_primary_image_error():
results = scrape_property(
location="Spokane, PA",
listing_type="for_rent", # or (for_sale, for_rent, pending)
past_days=360,
radius=3,
extra_property_data=False,
)
assert results is not None and len(results) > 0
def test_limit():
over_limit = 876
extra_params = {"limit": over_limit}
over_results = scrape_property(
location="Waddell, AZ",
listing_type="for_sale",
**extra_params,
)
assert over_results is not None and len(over_results) <= over_limit
under_limit = 1
under_results = scrape_property(
location="Waddell, AZ",
listing_type="for_sale",
limit=under_limit,
)
assert under_results is not None and len(under_results) == under_limit
def test_apartment_list_price():
results = scrape_property(
location="Spokane, WA",
listing_type="for_rent", # or (for_sale, for_rent, pending)
extra_property_data=False,
)
assert results is not None
results = results[results["style"] == "APARTMENT"]
#: get percentage of results with atleast 1 of any column not none, list_price, list_price_min, list_price_max
assert (
len(results[results[["list_price", "list_price_min", "list_price_max"]].notnull().any(axis=1)]) / len(results)
> 0.5
)
def test_builder_exists():
listing = scrape_property(
location="18149 W Poston Dr, Surprise, AZ 85387",
extra_property_data=False,
)
assert listing is not None
assert listing["builder_name"].nunique() > 0
def test_phone_number_matching():
searches = [
scrape_property(
location="Phoenix, AZ",
listing_type="for_sale",
limit=100,
),
scrape_property(
location="Phoenix, AZ",
listing_type="for_sale",
limit=100,
),
]
assert all([search is not None for search in searches])
#: random row
row = searches[0][searches[0]["agent_phones"].notnull()].sample()
#: find matching row
matching_row = searches[1].loc[searches[1]["property_url"] == row["property_url"].values[0]]
#: assert phone numbers are the same
assert row["agent_phones"].values[0] == matching_row["agent_phones"].values[0]
def test_return_type():
results = {
"pandas": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100)],
"pydantic": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic")],
"raw": [
scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
scrape_property(location="66642", listing_type="for_rent", limit=100, return_type="raw"),
],
}
assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
assert all(isinstance(result[0], Property) for result in results["pydantic"])
assert all(isinstance(result[0], dict) for result in results["raw"])
def test_has_open_house():
address_result = scrape_property("1 Hawthorne St Unit 12F, San Francisco, CA 94105", return_type="raw")
assert address_result[0]["open_houses"] is not None #: has open house data from address search
zip_code_result = scrape_property("94105", return_type="raw")
address_from_zip_result = list(filter(lambda row: row["property_id"] == '1264014746', zip_code_result))
assert address_from_zip_result[0]["open_houses"] is not None #: has open house data from general search
def test_return_type_consistency():
"""Test that return_type works consistently between general and address searches"""
# Test configurations - different search types
test_locations = [
("Dallas, TX", "general"), # General city search
("75201", "zip"), # ZIP code search
("2530 Al Lipscomb Way", "address") # Address search
]
for location, search_type in test_locations:
# Test all return types for each search type
pandas_result = scrape_property(
location=location,
listing_type="for_sale",
limit=3,
return_type="pandas"
)
pydantic_result = scrape_property(
location=location,
listing_type="for_sale",
limit=3,
return_type="pydantic"
)
raw_result = scrape_property(
location=location,
listing_type="for_sale",
limit=3,
return_type="raw"
)
# Validate pandas return type
assert isinstance(pandas_result, pd.DataFrame), f"pandas result should be DataFrame for {search_type}"
assert len(pandas_result) > 0, f"pandas result should not be empty for {search_type}"
required_columns = ["property_id", "property_url", "list_price", "status", "formatted_address"]
for col in required_columns:
assert col in pandas_result.columns, f"Missing column {col} in pandas result for {search_type}"
# Validate pydantic return type
assert isinstance(pydantic_result, list), f"pydantic result should be list for {search_type}"
assert len(pydantic_result) > 0, f"pydantic result should not be empty for {search_type}"
for item in pydantic_result:
assert isinstance(item, Property), f"pydantic items should be Property objects for {search_type}"
assert item.property_id is not None, f"property_id should not be None for {search_type}"
# Validate raw return type
assert isinstance(raw_result, list), f"raw result should be list for {search_type}"
assert len(raw_result) > 0, f"raw result should not be empty for {search_type}"
for item in raw_result:
assert isinstance(item, dict), f"raw items should be dict for {search_type}"
assert "property_id" in item, f"raw items should have property_id for {search_type}"
assert "href" in item, f"raw items should have href for {search_type}"
# Cross-validate that different return types return related data
pandas_ids = set(pandas_result["property_id"].tolist())
pydantic_ids = set(prop.property_id for prop in pydantic_result)
raw_ids = set(item["property_id"] for item in raw_result)
# All return types should have some properties
assert len(pandas_ids) > 0, f"pandas should return properties for {search_type}"
assert len(pydantic_ids) > 0, f"pydantic should return properties for {search_type}"
assert len(raw_ids) > 0, f"raw should return properties for {search_type}"