- data additions

This commit is contained in:
Zachary Hampton
2025-07-15 12:00:19 -07:00
parent 0d85100091
commit 8311f4dfbc
4 changed files with 67 additions and 1 deletions

View File

@@ -76,6 +76,7 @@ class PropertyType(Enum):
@dataclass
class Address:
formatted_address: str | None = None
full_line: str | None = None
street: str | None = None
unit: str | None = None
@@ -84,6 +85,8 @@ class Address:
zip: str | None = None
@dataclass
class Description:
primary_photo: str | None = None

View File

@@ -391,7 +391,14 @@ class RealtorScraper(Scraper):
extra_property_details = self.get_bulk_prop_details(property_ids) or {}
for result in properties_list:
result.update(extra_property_details.get(result["property_id"], {}))
specific_details_for_property = extra_property_details.get(result["property_id"], {})
#: address is retrieved on both homes and search homes, so when merged, homes overrides,
# this gets the internal data we want and only updates that (migrate to a func if more fields)
result["location"].update(specific_details_for_property["location"])
del specific_details_for_property["location"]
result.update(specific_details_for_property)
if self.return_type != ReturnType.raw:
with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:

View File

@@ -3,8 +3,10 @@ _SEARCH_HOMES_DATA_BASE = """{
listing_id
property_id
href
permalink
list_date
status
mls_status
last_sold_price
last_sold_date
list_price
@@ -12,6 +14,15 @@ _SEARCH_HOMES_DATA_BASE = """{
list_price_min
price_per_sqft
tags
open_houses {
start_date
end_date
description
time_zone
dst
href
methods
}
details {
category
text
@@ -154,6 +165,7 @@ _SEARCH_HOMES_DATA_BASE = """{
}
mls_set
nrds_id
state_license
rental_corporation {
fulfillment_id
}
@@ -172,6 +184,23 @@ fragment HomeData on Home {
nearbySchools: nearby_schools(radius: 5.0, limit_per_level: 3) {
__typename schools { district { __typename id name } }
}
popularity {
periods {
clicks_total
views_total
dwell_time_mean
dwell_time_median
leads_total
shares_total
saves_total
last_n_days
}
}
location {
parcel {
parcel_id
}
}
taxHistory: tax_history { __typename tax year assessment { __typename building land total } }
monthly_fees {
description
@@ -206,6 +235,23 @@ HOMES_DATA = """%s
description
display_amount
}
popularity {
periods {
clicks_total
views_total
dwell_time_mean
dwell_time_median
leads_total
shares_total
saves_total
last_n_days
}
}
location {
parcel {
parcel_id
}
}
parking {
unassigned_space_rent
assigned_spaces_available

View File

@@ -303,3 +303,13 @@ def test_return_type():
assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
assert all(isinstance(result[0], Property) for result in results["pydantic"])
assert all(isinstance(result[0], dict) for result in results["raw"])
def test_has_open_house():
address_result = scrape_property("1 Hawthorne St Unit 12F, San Francisco, CA 94105", return_type="raw")
assert address_result[0]["open_houses"] is not None #: has open house data from address search
zip_code_result = scrape_property("94105", return_type="raw")
address_from_zip_result = list(filter(lambda row: row["property_id"] == '1264014746', zip_code_result))
assert address_from_zip_result[0]["open_houses"] is not None #: has open house data from general search