mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-06 12:34:29 -08:00
- data additions
This commit is contained in:
@@ -76,6 +76,7 @@ class PropertyType(Enum):
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Address:
|
class Address:
|
||||||
|
formatted_address: str | None = None
|
||||||
full_line: str | None = None
|
full_line: str | None = None
|
||||||
street: str | None = None
|
street: str | None = None
|
||||||
unit: str | None = None
|
unit: str | None = None
|
||||||
@@ -84,6 +85,8 @@ class Address:
|
|||||||
zip: str | None = None
|
zip: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Description:
|
class Description:
|
||||||
primary_photo: str | None = None
|
primary_photo: str | None = None
|
||||||
|
|||||||
@@ -391,7 +391,14 @@ class RealtorScraper(Scraper):
|
|||||||
extra_property_details = self.get_bulk_prop_details(property_ids) or {}
|
extra_property_details = self.get_bulk_prop_details(property_ids) or {}
|
||||||
|
|
||||||
for result in properties_list:
|
for result in properties_list:
|
||||||
result.update(extra_property_details.get(result["property_id"], {}))
|
specific_details_for_property = extra_property_details.get(result["property_id"], {})
|
||||||
|
|
||||||
|
#: address is retrieved on both homes and search homes, so when merged, homes overrides,
|
||||||
|
# this gets the internal data we want and only updates that (migrate to a func if more fields)
|
||||||
|
result["location"].update(specific_details_for_property["location"])
|
||||||
|
del specific_details_for_property["location"]
|
||||||
|
|
||||||
|
result.update(specific_details_for_property)
|
||||||
|
|
||||||
if self.return_type != ReturnType.raw:
|
if self.return_type != ReturnType.raw:
|
||||||
with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:
|
with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:
|
||||||
|
|||||||
@@ -3,8 +3,10 @@ _SEARCH_HOMES_DATA_BASE = """{
|
|||||||
listing_id
|
listing_id
|
||||||
property_id
|
property_id
|
||||||
href
|
href
|
||||||
|
permalink
|
||||||
list_date
|
list_date
|
||||||
status
|
status
|
||||||
|
mls_status
|
||||||
last_sold_price
|
last_sold_price
|
||||||
last_sold_date
|
last_sold_date
|
||||||
list_price
|
list_price
|
||||||
@@ -12,6 +14,15 @@ _SEARCH_HOMES_DATA_BASE = """{
|
|||||||
list_price_min
|
list_price_min
|
||||||
price_per_sqft
|
price_per_sqft
|
||||||
tags
|
tags
|
||||||
|
open_houses {
|
||||||
|
start_date
|
||||||
|
end_date
|
||||||
|
description
|
||||||
|
time_zone
|
||||||
|
dst
|
||||||
|
href
|
||||||
|
methods
|
||||||
|
}
|
||||||
details {
|
details {
|
||||||
category
|
category
|
||||||
text
|
text
|
||||||
@@ -154,6 +165,7 @@ _SEARCH_HOMES_DATA_BASE = """{
|
|||||||
}
|
}
|
||||||
mls_set
|
mls_set
|
||||||
nrds_id
|
nrds_id
|
||||||
|
state_license
|
||||||
rental_corporation {
|
rental_corporation {
|
||||||
fulfillment_id
|
fulfillment_id
|
||||||
}
|
}
|
||||||
@@ -172,6 +184,23 @@ fragment HomeData on Home {
|
|||||||
nearbySchools: nearby_schools(radius: 5.0, limit_per_level: 3) {
|
nearbySchools: nearby_schools(radius: 5.0, limit_per_level: 3) {
|
||||||
__typename schools { district { __typename id name } }
|
__typename schools { district { __typename id name } }
|
||||||
}
|
}
|
||||||
|
popularity {
|
||||||
|
periods {
|
||||||
|
clicks_total
|
||||||
|
views_total
|
||||||
|
dwell_time_mean
|
||||||
|
dwell_time_median
|
||||||
|
leads_total
|
||||||
|
shares_total
|
||||||
|
saves_total
|
||||||
|
last_n_days
|
||||||
|
}
|
||||||
|
}
|
||||||
|
location {
|
||||||
|
parcel {
|
||||||
|
parcel_id
|
||||||
|
}
|
||||||
|
}
|
||||||
taxHistory: tax_history { __typename tax year assessment { __typename building land total } }
|
taxHistory: tax_history { __typename tax year assessment { __typename building land total } }
|
||||||
monthly_fees {
|
monthly_fees {
|
||||||
description
|
description
|
||||||
@@ -206,6 +235,23 @@ HOMES_DATA = """%s
|
|||||||
description
|
description
|
||||||
display_amount
|
display_amount
|
||||||
}
|
}
|
||||||
|
popularity {
|
||||||
|
periods {
|
||||||
|
clicks_total
|
||||||
|
views_total
|
||||||
|
dwell_time_mean
|
||||||
|
dwell_time_median
|
||||||
|
leads_total
|
||||||
|
shares_total
|
||||||
|
saves_total
|
||||||
|
last_n_days
|
||||||
|
}
|
||||||
|
}
|
||||||
|
location {
|
||||||
|
parcel {
|
||||||
|
parcel_id
|
||||||
|
}
|
||||||
|
}
|
||||||
parking {
|
parking {
|
||||||
unassigned_space_rent
|
unassigned_space_rent
|
||||||
assigned_spaces_available
|
assigned_spaces_available
|
||||||
|
|||||||
@@ -303,3 +303,13 @@ def test_return_type():
|
|||||||
assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
|
assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
|
||||||
assert all(isinstance(result[0], Property) for result in results["pydantic"])
|
assert all(isinstance(result[0], Property) for result in results["pydantic"])
|
||||||
assert all(isinstance(result[0], dict) for result in results["raw"])
|
assert all(isinstance(result[0], dict) for result in results["raw"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_has_open_house():
|
||||||
|
address_result = scrape_property("1 Hawthorne St Unit 12F, San Francisco, CA 94105", return_type="raw")
|
||||||
|
assert address_result[0]["open_houses"] is not None #: has open house data from address search
|
||||||
|
|
||||||
|
zip_code_result = scrape_property("94105", return_type="raw")
|
||||||
|
address_from_zip_result = list(filter(lambda row: row["property_id"] == '1264014746', zip_code_result))
|
||||||
|
|
||||||
|
assert address_from_zip_result[0]["open_houses"] is not None #: has open house data from general search
|
||||||
|
|||||||
Reference in New Issue
Block a user