mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-05 03:54:29 -08:00
- add all new data fields
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
from __future__ import annotations
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, computed_field
|
||||
from typing import Optional, Any
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, computed_field, HttpUrl, Field
|
||||
|
||||
|
||||
class ReturnType(Enum):
|
||||
@@ -72,9 +73,15 @@ class Address(BaseModel):
|
||||
full_line: str | None = None
|
||||
street: str | None = None
|
||||
unit: str | None = None
|
||||
city: str | None = None
|
||||
state: str | None = None
|
||||
zip: str | None = None
|
||||
city: str | None = Field(None, description="The name of the city")
|
||||
state: str | None = Field(None, description="The name of the state")
|
||||
zip: str | None = Field(None, description="zip code")
|
||||
|
||||
# Additional address fields from GraphQL
|
||||
street_direction: str | None = None
|
||||
street_number: str | None = None
|
||||
street_name: str | None = None
|
||||
street_suffix: str | None = None
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
@@ -102,19 +109,23 @@ class Address(BaseModel):
|
||||
|
||||
|
||||
class Description(BaseModel):
|
||||
primary_photo: str | None = None
|
||||
alt_photos: list[str] | None = None
|
||||
primary_photo: HttpUrl | None = None
|
||||
alt_photos: list[HttpUrl] | None = None
|
||||
style: PropertyType | None = None
|
||||
beds: int | None = None
|
||||
baths_full: int | None = None
|
||||
baths_half: int | None = None
|
||||
sqft: int | None = None
|
||||
lot_sqft: int | None = None
|
||||
sold_price: int | None = None
|
||||
year_built: int | None = None
|
||||
garage: float | None = None
|
||||
stories: int | None = None
|
||||
beds: int | None = Field(None, description="Total number of bedrooms")
|
||||
baths_full: int | None = Field(None, description="Total number of full bathrooms (4 parts: Sink, Shower, Bathtub and Toilet)")
|
||||
baths_half: int | None = Field(None, description="Total number of 1/2 bathrooms (2 parts: Usually Sink and Toilet)")
|
||||
sqft: int | None = Field(None, description="Square footage of the Home")
|
||||
lot_sqft: int | None = Field(None, description="Lot square footage")
|
||||
sold_price: int | None = Field(None, description="Sold price of home")
|
||||
year_built: int | None = Field(None, description="The year the building/home was built")
|
||||
garage: float | None = Field(None, description="Number of garage spaces")
|
||||
stories: int | None = Field(None, description="Number of stories in the building")
|
||||
text: str | None = None
|
||||
|
||||
# Additional description fields
|
||||
name: str | None = None
|
||||
type: str | None = None
|
||||
|
||||
|
||||
class AgentPhone(BaseModel):
|
||||
@@ -125,7 +136,7 @@ class AgentPhone(BaseModel):
|
||||
|
||||
|
||||
class Entity(BaseModel):
|
||||
name: str
|
||||
name: str | None = None # Make name optional since it can be None
|
||||
uuid: str | None = None
|
||||
|
||||
|
||||
@@ -160,29 +171,30 @@ class Advertisers(BaseModel):
|
||||
|
||||
|
||||
class Property(BaseModel):
|
||||
property_url: str
|
||||
property_id: str
|
||||
property_url: HttpUrl
|
||||
property_id: str = Field(..., description="Unique Home identifier also known as property id")
|
||||
#: allows_cats: bool
|
||||
#: allows_dogs: bool
|
||||
|
||||
listing_id: str | None = None
|
||||
permalink: str | None = None
|
||||
|
||||
mls: str | None = None
|
||||
mls_id: str | None = None
|
||||
status: str | None = None
|
||||
status: str | None = Field(None, description="Listing status: for_sale, for_rent, sold, off_market, active (New Home Subdivisions), other (if none of the above conditions were met)")
|
||||
address: Address | None = None
|
||||
|
||||
list_price: int | None = None
|
||||
list_price: int | None = Field(None, description="The current price of the Home")
|
||||
list_price_min: int | None = None
|
||||
list_price_max: int | None = None
|
||||
|
||||
list_date: str | None = None
|
||||
pending_date: str | None = None
|
||||
last_sold_date: str | None = None
|
||||
list_date: datetime | None = Field(None, description="The time this Home entered Move system")
|
||||
pending_date: datetime | None = Field(None, description="The date listing went into pending state")
|
||||
last_sold_date: datetime | None = Field(None, description="Last time the Home was sold")
|
||||
prc_sqft: int | None = None
|
||||
new_construction: bool | None = None
|
||||
hoa_fee: int | None = None
|
||||
days_on_mls: int | None = None
|
||||
new_construction: bool | None = Field(None, description="Search for new construction homes")
|
||||
hoa_fee: int | None = Field(None, description="Search for homes where HOA fee is known and falls within specified range")
|
||||
days_on_mls: int | None = Field(None, description="An integer value determined by the MLS to calculate days on market")
|
||||
description: Description | None = None
|
||||
tags: list[str] | None = None
|
||||
details: list[dict] | None = None
|
||||
@@ -190,8 +202,8 @@ class Property(BaseModel):
|
||||
latitude: float | None = None
|
||||
longitude: float | None = None
|
||||
neighborhoods: Optional[str] = None
|
||||
county: Optional[str] = None
|
||||
fips_code: Optional[str] = None
|
||||
county: Optional[str] = Field(None, description="County associated with home")
|
||||
fips_code: Optional[str] = Field(None, description="The FIPS (Federal Information Processing Standard) code for the county")
|
||||
nearby_schools: list[str] | None = None
|
||||
assessed_value: int | None = None
|
||||
estimated_value: int | None = None
|
||||
@@ -199,3 +211,124 @@ class Property(BaseModel):
|
||||
tax_history: list[dict] | None = None
|
||||
|
||||
advertisers: Advertisers | None = None
|
||||
|
||||
# Additional fields from GraphQL that aren't currently parsed
|
||||
mls_status: str | None = None
|
||||
last_sold_price: int | None = None
|
||||
|
||||
# Structured data from GraphQL
|
||||
open_houses: list[OpenHouse] | None = None
|
||||
pet_policy: PetPolicy | None = None
|
||||
units: list[Unit] | None = None
|
||||
monthly_fees: HomeMonthlyFee | None = Field(None, description="Monthly fees. Currently only some rental data will have them.")
|
||||
one_time_fees: list[HomeOneTimeFee] | None = Field(None, description="One time fees. Currently only some rental data will have them.")
|
||||
parking: HomeParkingDetails | None = Field(None, description="Parking information. Currently only some rental data will have it.")
|
||||
terms: list[PropertyDetails] | None = None
|
||||
popularity: Popularity | None = None
|
||||
tax_record: TaxRecord | None = None
|
||||
parcel_info: dict | None = None # Keep as dict for flexibility
|
||||
current_estimates: list[PropertyEstimate] | None = None
|
||||
estimates: dict | None = None # Keep as dict for flexibility
|
||||
photos: list[dict] | None = None # Keep as dict for photo structure
|
||||
flags: HomeFlags | None = Field(None, description="Home flags for Listing/Property")
|
||||
|
||||
|
||||
# Specialized models for GraphQL types
|
||||
|
||||
class HomeMonthlyFee(BaseModel):
|
||||
description: str | None = None
|
||||
display_amount: str | None = None
|
||||
|
||||
|
||||
class HomeOneTimeFee(BaseModel):
|
||||
description: str | None = None
|
||||
display_amount: str | None = None
|
||||
|
||||
|
||||
class HomeParkingDetails(BaseModel):
|
||||
unassigned_space_rent: int | None = None
|
||||
assigned_spaces_available: int | None = None
|
||||
description: str | None = Field(None, description="Parking information. Currently only some rental data will have it.")
|
||||
assigned_space_rent: int | None = None
|
||||
|
||||
|
||||
class PetPolicy(BaseModel):
|
||||
cats: bool | None = Field(None, description="Search for homes which allow cats")
|
||||
dogs: bool | None = Field(None, description="Search for homes which allow dogs")
|
||||
dogs_small: bool | None = Field(None, description="Search for homes with allow small dogs")
|
||||
dogs_large: bool | None = Field(None, description="Search for homes which allow large dogs")
|
||||
|
||||
|
||||
class OpenHouse(BaseModel):
|
||||
start_date: datetime | None = None
|
||||
end_date: datetime | None = None
|
||||
description: str | None = None
|
||||
time_zone: str | None = None
|
||||
dst: bool | None = None
|
||||
href: HttpUrl | None = None
|
||||
methods: list[str] | None = None
|
||||
|
||||
|
||||
class HomeFlags(BaseModel):
|
||||
is_pending: bool | None = None
|
||||
is_contingent: bool | None = None
|
||||
is_new_construction: bool | None = None
|
||||
is_coming_soon: bool | None = None
|
||||
is_new_listing: bool | None = None
|
||||
is_price_reduced: bool | None = None
|
||||
is_foreclosure: bool | None = None
|
||||
|
||||
|
||||
class PopularityPeriod(BaseModel):
|
||||
clicks_total: int | None = None
|
||||
views_total: int | None = None
|
||||
dwell_time_mean: float | None = None
|
||||
dwell_time_median: float | None = None
|
||||
leads_total: int | None = None
|
||||
shares_total: int | None = None
|
||||
saves_total: int | None = None
|
||||
last_n_days: int | None = None
|
||||
|
||||
|
||||
class Popularity(BaseModel):
|
||||
periods: list[PopularityPeriod] | None = None
|
||||
|
||||
|
||||
class TaxRecord(BaseModel):
|
||||
cl_id: str | None = None
|
||||
public_record_id: str | None = None
|
||||
last_update_date: datetime | None = None
|
||||
apn: str | None = None
|
||||
tax_parcel_id: str | None = None
|
||||
|
||||
|
||||
class PropertyEstimate(BaseModel):
|
||||
estimate: int | None = None
|
||||
estimate_high: int | None = None
|
||||
estimate_low: int | None = None
|
||||
date: datetime | None = None
|
||||
is_best_home_value: bool | None = None
|
||||
|
||||
|
||||
class PropertyDetails(BaseModel):
|
||||
category: str | None = None
|
||||
text: list[str] | None = None
|
||||
parent_category: str | None = None
|
||||
|
||||
|
||||
class UnitDescription(BaseModel):
|
||||
baths_consolidated: str | None = None
|
||||
baths: float | None = None # Changed to float to handle values like 2.5
|
||||
beds: int | None = None
|
||||
sqft: int | None = None
|
||||
|
||||
|
||||
class UnitAvailability(BaseModel):
|
||||
date: datetime | None = None
|
||||
|
||||
|
||||
class Unit(BaseModel):
|
||||
availability: UnitAvailability | None = None
|
||||
description: UnitDescription | None = None
|
||||
photos: list[dict] | None = None # Keep as dict for photo structure
|
||||
list_price: int | None = None
|
||||
|
||||
@@ -209,13 +209,15 @@ class RealtorScraper(Scraper):
|
||||
property_url=result["href"],
|
||||
property_id=property_id,
|
||||
listing_id=result.get("listing_id"),
|
||||
permalink=result.get("permalink"),
|
||||
status=("PENDING" if is_pending else "CONTINGENT" if is_contingent else result["status"].upper()),
|
||||
list_price=result["list_price"],
|
||||
list_price_min=result["list_price_min"],
|
||||
list_price_max=result["list_price_max"],
|
||||
list_date=(result["list_date"].split("T")[0] if result.get("list_date") else None),
|
||||
list_date=(datetime.fromisoformat(result["list_date"].split("T")[0]) if result.get("list_date") else None),
|
||||
prc_sqft=result.get("price_per_sqft"),
|
||||
last_sold_date=result.get("last_sold_date"),
|
||||
last_sold_date=(datetime.fromisoformat(result["last_sold_date"]) if result.get("last_sold_date") else None),
|
||||
pending_date=(datetime.fromisoformat(result["pending_date"].split("T")[0]) if result.get("pending_date") else None),
|
||||
new_construction=result["flags"].get("is_new_construction") is True,
|
||||
hoa_fee=(result["hoa"]["fee"] if result.get("hoa") and isinstance(result["hoa"], dict) else None),
|
||||
latitude=(result["location"]["address"]["coordinate"].get("lat") if able_to_get_lat_long else None),
|
||||
@@ -232,6 +234,26 @@ class RealtorScraper(Scraper):
|
||||
advertisers=advertisers,
|
||||
tax=prop_details.get("tax"),
|
||||
tax_history=prop_details.get("tax_history"),
|
||||
|
||||
# Additional fields from GraphQL
|
||||
mls_status=result.get("mls_status"),
|
||||
last_sold_price=result.get("last_sold_price"),
|
||||
tags=result.get("tags"),
|
||||
details=result.get("details"),
|
||||
open_houses=self._parse_open_houses(result.get("open_houses")),
|
||||
pet_policy=result.get("pet_policy"),
|
||||
units=self._parse_units(result.get("units")),
|
||||
monthly_fees=result.get("monthly_fees"),
|
||||
one_time_fees=result.get("one_time_fees"),
|
||||
parking=result.get("parking"),
|
||||
terms=result.get("terms"),
|
||||
popularity=result.get("popularity"),
|
||||
tax_record=self._parse_tax_record(result.get("tax_record")),
|
||||
parcel_info=result.get("location", {}).get("parcel"),
|
||||
current_estimates=self._parse_current_estimates(result.get("current_estimates")),
|
||||
estimates=result.get("estimates"),
|
||||
photos=result.get("photos"),
|
||||
flags=result.get("flags"),
|
||||
)
|
||||
return realty_property
|
||||
|
||||
@@ -395,8 +417,9 @@ class RealtorScraper(Scraper):
|
||||
|
||||
#: address is retrieved on both homes and search homes, so when merged, homes overrides,
|
||||
# this gets the internal data we want and only updates that (migrate to a func if more fields)
|
||||
result["location"].update(specific_details_for_property["location"])
|
||||
del specific_details_for_property["location"]
|
||||
if "location" in specific_details_for_property:
|
||||
result["location"].update(specific_details_for_property["location"])
|
||||
del specific_details_for_property["location"]
|
||||
|
||||
result.update(specific_details_for_property)
|
||||
|
||||
@@ -614,6 +637,12 @@ class RealtorScraper(Scraper):
|
||||
city=address["city"],
|
||||
state=address["state_code"],
|
||||
zip=address["postal_code"],
|
||||
|
||||
# Additional address fields
|
||||
street_direction=address.get("street_direction"),
|
||||
street_number=address.get("street_number"),
|
||||
street_name=address.get("street_name"),
|
||||
street_suffix=address.get("street_suffix"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -630,7 +659,7 @@ class RealtorScraper(Scraper):
|
||||
if style is not None:
|
||||
style = style.upper()
|
||||
|
||||
primary_photo = ""
|
||||
primary_photo = None
|
||||
if (primary_photo_info := result.get("primary_photo")) and (
|
||||
primary_photo_href := primary_photo_info.get("href")
|
||||
):
|
||||
@@ -654,6 +683,10 @@ class RealtorScraper(Scraper):
|
||||
garage=description_data.get("garage"),
|
||||
stories=description_data.get("stories"),
|
||||
text=description_data.get("text"),
|
||||
|
||||
# Additional description fields
|
||||
name=description_data.get("name"),
|
||||
type=description_data.get("type"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -685,3 +718,89 @@ class RealtorScraper(Scraper):
|
||||
for photo_info in photos_info
|
||||
if photo_info.get("href")
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _parse_open_houses(open_houses_data: list[dict] | None) -> list[dict] | None:
|
||||
"""Parse open houses data and convert date strings to datetime objects"""
|
||||
if not open_houses_data:
|
||||
return None
|
||||
|
||||
parsed_open_houses = []
|
||||
for oh in open_houses_data:
|
||||
parsed_oh = oh.copy()
|
||||
|
||||
# Parse start_date and end_date
|
||||
if parsed_oh.get("start_date"):
|
||||
try:
|
||||
parsed_oh["start_date"] = datetime.fromisoformat(parsed_oh["start_date"].replace("Z", "+00:00"))
|
||||
except (ValueError, AttributeError):
|
||||
parsed_oh["start_date"] = None
|
||||
|
||||
if parsed_oh.get("end_date"):
|
||||
try:
|
||||
parsed_oh["end_date"] = datetime.fromisoformat(parsed_oh["end_date"].replace("Z", "+00:00"))
|
||||
except (ValueError, AttributeError):
|
||||
parsed_oh["end_date"] = None
|
||||
|
||||
parsed_open_houses.append(parsed_oh)
|
||||
|
||||
return parsed_open_houses
|
||||
|
||||
@staticmethod
|
||||
def _parse_units(units_data: list[dict] | None) -> list[dict] | None:
|
||||
"""Parse units data and convert date strings to datetime objects"""
|
||||
if not units_data:
|
||||
return None
|
||||
|
||||
parsed_units = []
|
||||
for unit in units_data:
|
||||
parsed_unit = unit.copy()
|
||||
|
||||
# Parse availability date
|
||||
if parsed_unit.get("availability") and parsed_unit["availability"].get("date"):
|
||||
try:
|
||||
parsed_unit["availability"]["date"] = datetime.fromisoformat(parsed_unit["availability"]["date"].replace("Z", "+00:00"))
|
||||
except (ValueError, AttributeError):
|
||||
parsed_unit["availability"]["date"] = None
|
||||
|
||||
parsed_units.append(parsed_unit)
|
||||
|
||||
return parsed_units
|
||||
|
||||
@staticmethod
|
||||
def _parse_tax_record(tax_record_data: dict | None) -> dict | None:
|
||||
"""Parse tax record data and convert date strings to datetime objects"""
|
||||
if not tax_record_data:
|
||||
return None
|
||||
|
||||
parsed_tax_record = tax_record_data.copy()
|
||||
|
||||
# Parse last_update_date
|
||||
if parsed_tax_record.get("last_update_date"):
|
||||
try:
|
||||
parsed_tax_record["last_update_date"] = datetime.fromisoformat(parsed_tax_record["last_update_date"].replace("Z", "+00:00"))
|
||||
except (ValueError, AttributeError):
|
||||
parsed_tax_record["last_update_date"] = None
|
||||
|
||||
return parsed_tax_record
|
||||
|
||||
@staticmethod
|
||||
def _parse_current_estimates(estimates_data: list[dict] | None) -> list[dict] | None:
|
||||
"""Parse current estimates data and convert date strings to datetime objects"""
|
||||
if not estimates_data:
|
||||
return None
|
||||
|
||||
parsed_estimates = []
|
||||
for estimate in estimates_data:
|
||||
parsed_estimate = estimate.copy()
|
||||
|
||||
# Parse date
|
||||
if parsed_estimate.get("date"):
|
||||
try:
|
||||
parsed_estimate["date"] = datetime.fromisoformat(parsed_estimate["date"].replace("Z", "+00:00"))
|
||||
except (ValueError, AttributeError):
|
||||
parsed_estimate["date"] = None
|
||||
|
||||
parsed_estimates.append(parsed_estimate)
|
||||
|
||||
return parsed_estimates
|
||||
|
||||
48881
homeharvest/core/scrapers/realtor/introspection.json
Normal file
48881
homeharvest/core/scrapers/realtor/introspection.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,9 +8,11 @@ ordered_properties = [
|
||||
"property_url",
|
||||
"property_id",
|
||||
"listing_id",
|
||||
"permalink",
|
||||
"mls",
|
||||
"mls_id",
|
||||
"status",
|
||||
"mls_status",
|
||||
"text",
|
||||
"style",
|
||||
"full_street_line",
|
||||
@@ -19,6 +21,7 @@ ordered_properties = [
|
||||
"city",
|
||||
"state",
|
||||
"zip_code",
|
||||
"formatted_address",
|
||||
"beds",
|
||||
"full_baths",
|
||||
"half_baths",
|
||||
@@ -29,8 +32,10 @@ ordered_properties = [
|
||||
"list_price_min",
|
||||
"list_price_max",
|
||||
"list_date",
|
||||
"pending_date",
|
||||
"sold_price",
|
||||
"last_sold_date",
|
||||
"last_sold_price",
|
||||
"assessed_value",
|
||||
"estimated_value",
|
||||
"tax",
|
||||
@@ -63,7 +68,7 @@ ordered_properties = [
|
||||
"office_phones",
|
||||
"nearby_schools",
|
||||
"primary_photo",
|
||||
"alt_photos",
|
||||
"alt_photos"
|
||||
]
|
||||
|
||||
|
||||
@@ -79,6 +84,7 @@ def process_result(result: Property) -> pd.DataFrame:
|
||||
prop_data["city"] = address_data.get("city")
|
||||
prop_data["state"] = address_data.get("state")
|
||||
prop_data["zip_code"] = address_data.get("zip")
|
||||
prop_data["formatted_address"] = address_data.get("formatted_address")
|
||||
|
||||
if "advertisers" in prop_data and prop_data.get("advertisers"):
|
||||
advertiser_data = prop_data["advertisers"]
|
||||
@@ -112,11 +118,20 @@ def process_result(result: Property) -> pd.DataFrame:
|
||||
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
|
||||
prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None
|
||||
prop_data["nearby_schools"] = ", ".join(set(prop_data["nearby_schools"])) if prop_data["nearby_schools"] else None
|
||||
|
||||
# Convert datetime objects to strings for CSV
|
||||
for date_field in ["list_date", "pending_date", "last_sold_date"]:
|
||||
if prop_data.get(date_field):
|
||||
prop_data[date_field] = prop_data[date_field].strftime("%Y-%m-%d") if hasattr(prop_data[date_field], 'strftime') else prop_data[date_field]
|
||||
|
||||
# Convert HttpUrl objects to strings for CSV
|
||||
if prop_data.get("property_url"):
|
||||
prop_data["property_url"] = str(prop_data["property_url"])
|
||||
|
||||
description = result.description
|
||||
if description:
|
||||
prop_data["primary_photo"] = description.primary_photo
|
||||
prop_data["alt_photos"] = ", ".join(description.alt_photos) if description.alt_photos else None
|
||||
prop_data["primary_photo"] = str(description.primary_photo) if description.primary_photo else None
|
||||
prop_data["alt_photos"] = ", ".join(str(url) for url in description.alt_photos) if description.alt_photos else None
|
||||
prop_data["style"] = (
|
||||
description.style
|
||||
if isinstance(description.style, str)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.4.12"
|
||||
version = "0.5.0"
|
||||
description = "Real estate scraping library"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||
|
||||
@@ -313,3 +313,73 @@ def test_has_open_house():
|
||||
address_from_zip_result = list(filter(lambda row: row["property_id"] == '1264014746', zip_code_result))
|
||||
|
||||
assert address_from_zip_result[0]["open_houses"] is not None #: has open house data from general search
|
||||
|
||||
|
||||
|
||||
def test_return_type_consistency():
|
||||
"""Test that return_type works consistently between general and address searches"""
|
||||
|
||||
# Test configurations - different search types
|
||||
test_locations = [
|
||||
("Dallas, TX", "general"), # General city search
|
||||
("75201", "zip"), # ZIP code search
|
||||
("2530 Al Lipscomb Way", "address") # Address search
|
||||
]
|
||||
|
||||
for location, search_type in test_locations:
|
||||
# Test all return types for each search type
|
||||
pandas_result = scrape_property(
|
||||
location=location,
|
||||
listing_type="for_sale",
|
||||
limit=3,
|
||||
return_type="pandas"
|
||||
)
|
||||
|
||||
pydantic_result = scrape_property(
|
||||
location=location,
|
||||
listing_type="for_sale",
|
||||
limit=3,
|
||||
return_type="pydantic"
|
||||
)
|
||||
|
||||
raw_result = scrape_property(
|
||||
location=location,
|
||||
listing_type="for_sale",
|
||||
limit=3,
|
||||
return_type="raw"
|
||||
)
|
||||
|
||||
# Validate pandas return type
|
||||
assert isinstance(pandas_result, pd.DataFrame), f"pandas result should be DataFrame for {search_type}"
|
||||
assert len(pandas_result) > 0, f"pandas result should not be empty for {search_type}"
|
||||
|
||||
required_columns = ["property_id", "property_url", "list_price", "status", "formatted_address"]
|
||||
for col in required_columns:
|
||||
assert col in pandas_result.columns, f"Missing column {col} in pandas result for {search_type}"
|
||||
|
||||
# Validate pydantic return type
|
||||
assert isinstance(pydantic_result, list), f"pydantic result should be list for {search_type}"
|
||||
assert len(pydantic_result) > 0, f"pydantic result should not be empty for {search_type}"
|
||||
|
||||
for item in pydantic_result:
|
||||
assert isinstance(item, Property), f"pydantic items should be Property objects for {search_type}"
|
||||
assert item.property_id is not None, f"property_id should not be None for {search_type}"
|
||||
|
||||
# Validate raw return type
|
||||
assert isinstance(raw_result, list), f"raw result should be list for {search_type}"
|
||||
assert len(raw_result) > 0, f"raw result should not be empty for {search_type}"
|
||||
|
||||
for item in raw_result:
|
||||
assert isinstance(item, dict), f"raw items should be dict for {search_type}"
|
||||
assert "property_id" in item, f"raw items should have property_id for {search_type}"
|
||||
assert "href" in item, f"raw items should have href for {search_type}"
|
||||
|
||||
# Cross-validate that different return types return related data
|
||||
pandas_ids = set(pandas_result["property_id"].tolist())
|
||||
pydantic_ids = set(prop.property_id for prop in pydantic_result)
|
||||
raw_ids = set(item["property_id"] for item in raw_result)
|
||||
|
||||
# All return types should have some properties
|
||||
assert len(pandas_ids) > 0, f"pandas should return properties for {search_type}"
|
||||
assert len(pydantic_ids) > 0, f"pydantic should return properties for {search_type}"
|
||||
assert len(raw_ids) > 0, f"raw should return properties for {search_type}"
|
||||
|
||||
Reference in New Issue
Block a user