[fix] add back zillow/redfin
parent
bd33c3b5a4
commit
ff95ca0611
|
@ -24,7 +24,12 @@ def _validate_input(site_name: str, status: str) -> None:
|
||||||
|
|
||||||
|
|
||||||
def _scrape_single_site(
|
def _scrape_single_site(
|
||||||
location: str, site_name: str, status: str, proxy: str = None, timeframe: str = None
|
location: str,
|
||||||
|
site_name: str,
|
||||||
|
status: str,
|
||||||
|
radius: float,
|
||||||
|
proxy: str = None,
|
||||||
|
timeframe: str = None,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Helper function to scrape a single site.
|
Helper function to scrape a single site.
|
||||||
|
@ -36,6 +41,7 @@ def _scrape_single_site(
|
||||||
status=status,
|
status=status,
|
||||||
site_name=SiteName.get_by_value(site_name.lower()),
|
site_name=SiteName.get_by_value(site_name.lower()),
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
|
radius=radius,
|
||||||
timeframe=timeframe,
|
timeframe=timeframe,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -53,7 +59,8 @@ def scrape_property(
|
||||||
location: str,
|
location: str,
|
||||||
timeframe: str = None,
|
timeframe: str = None,
|
||||||
site_name: Union[str, list[str]] = None,
|
site_name: Union[str, list[str]] = None,
|
||||||
status: str = "sale",
|
listing_type: str = "for_sale",
|
||||||
|
radius: float = None,
|
||||||
proxy: str = None,
|
proxy: str = None,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
|
@ -65,6 +72,7 @@ def scrape_property(
|
||||||
:param listing_type: Listing type (e.g. 'for_sale', 'for_rent', 'sold')
|
:param listing_type: Listing type (e.g. 'for_sale', 'for_rent', 'sold')
|
||||||
:return: pd.DataFrame containing properties
|
:return: pd.DataFrame containing properties
|
||||||
"""
|
"""
|
||||||
|
status = listing_type
|
||||||
if site_name is None:
|
if site_name is None:
|
||||||
site_name = list(_scrapers.keys())
|
site_name = list(_scrapers.keys())
|
||||||
|
|
||||||
|
@ -80,7 +88,13 @@ def scrape_property(
|
||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
futures = {
|
futures = {
|
||||||
executor.submit(
|
executor.submit(
|
||||||
_scrape_single_site, location, s_name, status, proxy, timeframe
|
_scrape_single_site,
|
||||||
|
location,
|
||||||
|
s_name,
|
||||||
|
status,
|
||||||
|
radius,
|
||||||
|
proxy,
|
||||||
|
timeframe,
|
||||||
): s_name
|
): s_name
|
||||||
for s_name in site_name
|
for s_name in site_name
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ class ScraperInput:
|
||||||
site_name: str
|
site_name: str
|
||||||
proxy: Optional[str] = None
|
proxy: Optional[str] = None
|
||||||
timeframe: Optional[str] = None
|
timeframe: Optional[str] = None
|
||||||
|
radius: float | None = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.status == "sold" and not self.timeframe:
|
if self.status == "sold" and not self.timeframe:
|
||||||
|
@ -50,6 +51,7 @@ class Scraper:
|
||||||
|
|
||||||
self.listing_type = scraper_input.status
|
self.listing_type = scraper_input.status
|
||||||
self.site_name = scraper_input.site_name
|
self.site_name = scraper_input.site_name
|
||||||
|
self.radius = scraper_input.radius
|
||||||
|
|
||||||
def search(self) -> list[Property]:
|
def search(self) -> list[Property]:
|
||||||
...
|
...
|
||||||
|
|
|
@ -590,12 +590,15 @@ class RealtorScraper(Scraper):
|
||||||
def search(self):
|
def search(self):
|
||||||
location_info = self.handle_location()
|
location_info = self.handle_location()
|
||||||
location_type = location_info["area_type"]
|
location_type = location_info["area_type"]
|
||||||
|
is_for_comps = self.radius is not None and location_type == "address"
|
||||||
|
|
||||||
if location_type == "address":
|
if location_type == "address" and not is_for_comps:
|
||||||
property_id = location_info["mpr_id"]
|
property_id = location_info["mpr_id"]
|
||||||
return self.handle_address(property_id)
|
return self.handle_address(property_id)
|
||||||
|
|
||||||
offset = 0
|
offset = 0
|
||||||
|
|
||||||
|
if not is_for_comps:
|
||||||
search_variables = {
|
search_variables = {
|
||||||
"city": location_info.get("city"),
|
"city": location_info.get("city"),
|
||||||
"county": location_info.get("county"),
|
"county": location_info.get("county"),
|
||||||
|
@ -603,6 +606,13 @@ class RealtorScraper(Scraper):
|
||||||
"postal_code": location_info.get("postal_code"),
|
"postal_code": location_info.get("postal_code"),
|
||||||
"offset": offset,
|
"offset": offset,
|
||||||
}
|
}
|
||||||
|
else:
|
||||||
|
coordinates = list(location_info["centroid"].values())
|
||||||
|
search_variables = {
|
||||||
|
"coordinates": coordinates,
|
||||||
|
"radius": "{}mi".format(self.radius),
|
||||||
|
"offset": offset,
|
||||||
|
}
|
||||||
|
|
||||||
result = self.handle_area(search_variables)
|
result = self.handle_area(search_variables)
|
||||||
total = result["total"]
|
total = result["total"]
|
||||||
|
|
|
@ -0,0 +1,228 @@
|
||||||
|
"""
|
||||||
|
homeharvest.redfin.__init__
|
||||||
|
~~~~~~~~~~~~
|
||||||
|
|
||||||
|
This module implements the scraper for redfin.com
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
from .. import Scraper
|
||||||
|
from ..models import Property, Address, Status
|
||||||
|
from ....exceptions import NoResultsFound, SearchTooBroad
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class RedfinScraper(Scraper):
|
||||||
|
def __init__(self, scraper_input):
|
||||||
|
super().__init__(scraper_input)
|
||||||
|
self.listing_type = scraper_input.status
|
||||||
|
|
||||||
|
def _handle_location(self):
|
||||||
|
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(
|
||||||
|
self.location
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self.session.get(url)
|
||||||
|
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||||
|
|
||||||
|
def get_region_type(match_type: str):
|
||||||
|
if match_type == "4":
|
||||||
|
return "2" #: zip
|
||||||
|
elif match_type == "2":
|
||||||
|
return "6" #: city
|
||||||
|
elif match_type == "1":
|
||||||
|
return "address" #: address, needs to be handled differently
|
||||||
|
elif match_type == "11":
|
||||||
|
return "state"
|
||||||
|
|
||||||
|
if "exactMatch" not in response_json["payload"]:
|
||||||
|
raise NoResultsFound(
|
||||||
|
"No results found for location: {}".format(self.location)
|
||||||
|
)
|
||||||
|
|
||||||
|
if response_json["payload"]["exactMatch"] is not None:
|
||||||
|
target = response_json["payload"]["exactMatch"]
|
||||||
|
else:
|
||||||
|
target = response_json["payload"]["sections"][0]["rows"][0]
|
||||||
|
|
||||||
|
return target["id"].split("_")[1], get_region_type(target["type"])
|
||||||
|
|
||||||
|
def _parse_home(self, home: dict, single_search: bool = False) -> Property:
|
||||||
|
def get_value(key: str) -> Any | None:
|
||||||
|
if key in home and "value" in home[key]:
|
||||||
|
return home[key]["value"]
|
||||||
|
|
||||||
|
if not single_search:
|
||||||
|
address = Address(
|
||||||
|
street=get_value("streetLine"),
|
||||||
|
city=home.get("city"),
|
||||||
|
state=home.get("state"),
|
||||||
|
zip=home.get("zip"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
address_info = home.get("streetAddress")
|
||||||
|
|
||||||
|
address = Address(
|
||||||
|
street=address_info.get("assembledAddress"),
|
||||||
|
city=home.get("city"),
|
||||||
|
state=home.get("state"),
|
||||||
|
zip=home.get("zip"),
|
||||||
|
)
|
||||||
|
|
||||||
|
url = "https://www.redfin.com{}".format(home["url"])
|
||||||
|
lot_size_data = home.get("lotSize")
|
||||||
|
|
||||||
|
if not isinstance(lot_size_data, int):
|
||||||
|
lot_size = (
|
||||||
|
lot_size_data.get("value", None)
|
||||||
|
if isinstance(lot_size_data, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
lot_size = lot_size_data
|
||||||
|
|
||||||
|
lat_long = get_value("latLong")
|
||||||
|
|
||||||
|
return Property(
|
||||||
|
status=self.listing_type,
|
||||||
|
address=address,
|
||||||
|
property_url=url,
|
||||||
|
beds=home["beds"] if "beds" in home else None,
|
||||||
|
baths_full=home["baths"] if "baths" in home else None,
|
||||||
|
list_price=get_value("price"),
|
||||||
|
est_sf=get_value("sqFt"),
|
||||||
|
stories=home["stories"] if "stories" in home else None,
|
||||||
|
yr_blt=get_value("yearBuilt")
|
||||||
|
if not single_search
|
||||||
|
else home.get("yearBuilt"),
|
||||||
|
lot_sf=lot_size,
|
||||||
|
prc_sqft=get_value("pricePerSqFt")
|
||||||
|
if type(home.get("pricePerSqFt")) != int
|
||||||
|
else home.get("pricePerSqFt"),
|
||||||
|
mls_id=get_value("mlsId"),
|
||||||
|
latitude=lat_long.get("latitude") if lat_long else None,
|
||||||
|
longitude=lat_long.get("longitude") if lat_long else None,
|
||||||
|
last_sold_date=datetime.fromtimestamp(home["soldDate"] / 1000)
|
||||||
|
if "soldDate" in home
|
||||||
|
else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _handle_rentals(self, region_id, region_type):
|
||||||
|
url = f"https://www.redfin.com/stingray/api/v1/search/rentals?al=1&isRentals=true®ion_id={region_id}®ion_type={region_type}&num_homes=100000"
|
||||||
|
|
||||||
|
response = self.session.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
homes = response.json()
|
||||||
|
|
||||||
|
properties_list = []
|
||||||
|
|
||||||
|
for home in homes["homes"]:
|
||||||
|
home_data = home["homeData"]
|
||||||
|
rental_data = home["rentalExtension"]
|
||||||
|
|
||||||
|
property_url = f"https://www.redfin.com{home_data.get('url', '')}"
|
||||||
|
address_info = home_data.get("addressInfo", {})
|
||||||
|
centroid = address_info.get("centroid", {}).get("centroid", {})
|
||||||
|
address = Address(
|
||||||
|
street=address_info.get("formattedStreetLine"),
|
||||||
|
city=address_info.get("city"),
|
||||||
|
state=address_info.get("state"),
|
||||||
|
zip=address_info.get("zip"),
|
||||||
|
)
|
||||||
|
|
||||||
|
price_range = rental_data.get("rentPriceRange", {"min": None, "max": None})
|
||||||
|
bed_range = rental_data.get("bedRange", {"min": None, "max": None})
|
||||||
|
bath_range = rental_data.get("bathRange", {"min": None, "max": None})
|
||||||
|
sqft_range = rental_data.get("sqftRange", {"min": None, "max": None})
|
||||||
|
|
||||||
|
property_ = Property(
|
||||||
|
property_url=property_url,
|
||||||
|
status=Status.FOR_RENT.value,
|
||||||
|
address=address,
|
||||||
|
latitude=centroid.get("latitude"),
|
||||||
|
longitude=centroid.get("longitude"),
|
||||||
|
baths_full=bath_range.get("min"),
|
||||||
|
beds=bed_range.get("min"),
|
||||||
|
list_price=price_range.get("min"),
|
||||||
|
est_sf=sqft_range.get("min"),
|
||||||
|
)
|
||||||
|
|
||||||
|
properties_list.append(property_)
|
||||||
|
|
||||||
|
if not properties_list:
|
||||||
|
raise NoResultsFound("No rentals found for the given location.")
|
||||||
|
|
||||||
|
return properties_list
|
||||||
|
|
||||||
|
def _parse_building(self, building: dict) -> Property:
|
||||||
|
street_address = " ".join(
|
||||||
|
[
|
||||||
|
building["address"]["streetNumber"],
|
||||||
|
building["address"]["directionalPrefix"],
|
||||||
|
building["address"]["streetName"],
|
||||||
|
building["address"]["streetType"],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return Property(
|
||||||
|
status=self.status,
|
||||||
|
address=Address(
|
||||||
|
street=street_address,
|
||||||
|
city=building["address"]["city"],
|
||||||
|
state=building["address"]["stateOrProvinceCode"],
|
||||||
|
zip=building["address"]["postalCode"],
|
||||||
|
),
|
||||||
|
property_url="https://www.redfin.com{}".format(building["url"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle_address(self, home_id: str):
|
||||||
|
"""
|
||||||
|
EPs:
|
||||||
|
https://www.redfin.com/stingray/api/home/details/initialInfo?al=1&path=/TX/Austin/70-Rainey-St-78701/unit-1608/home/147337694
|
||||||
|
https://www.redfin.com/stingray/api/home/details/mainHouseInfoPanelInfo?propertyId=147337694&accessLevel=3
|
||||||
|
https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=147337694&accessLevel=3
|
||||||
|
https://www.redfin.com/stingray/api/home/details/belowTheFold?propertyId=147337694&accessLevel=3
|
||||||
|
"""
|
||||||
|
url = "https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId={}&accessLevel=3".format(
|
||||||
|
home_id
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self.session.get(url)
|
||||||
|
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||||
|
|
||||||
|
parsed_home = self._parse_home(
|
||||||
|
response_json["payload"]["addressSectionInfo"], single_search=True
|
||||||
|
)
|
||||||
|
return [parsed_home]
|
||||||
|
|
||||||
|
def search(self):
|
||||||
|
region_id, region_type = self._handle_location()
|
||||||
|
|
||||||
|
if region_type == "state":
|
||||||
|
raise SearchTooBroad(
|
||||||
|
"State searches are not supported, please use a more specific location."
|
||||||
|
)
|
||||||
|
|
||||||
|
if region_type == "address":
|
||||||
|
home_id = region_id
|
||||||
|
return self.handle_address(home_id)
|
||||||
|
|
||||||
|
if self.listing_type == Status.FOR_RENT:
|
||||||
|
return self._handle_rentals(region_id, region_type)
|
||||||
|
else:
|
||||||
|
if self.listing_type == Status.FOR_SALE:
|
||||||
|
url = f"https://www.redfin.com/stingray/api/gis?al=1®ion_id={region_id}®ion_type={region_type}&num_homes=100000"
|
||||||
|
else:
|
||||||
|
url = f"https://www.redfin.com/stingray/api/gis?al=1®ion_id={region_id}®ion_type={region_type}&sold_within_days=30&num_homes=100000"
|
||||||
|
response = self.session.get(url)
|
||||||
|
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||||
|
|
||||||
|
if "payload" in response_json:
|
||||||
|
homes_list = response_json["payload"].get("homes", [])
|
||||||
|
buildings_list = response_json["payload"].get("buildings", {}).values()
|
||||||
|
|
||||||
|
homes = [self._parse_home(home) for home in homes_list] + [
|
||||||
|
self._parse_building(building) for building in buildings_list
|
||||||
|
]
|
||||||
|
return homes
|
||||||
|
else:
|
||||||
|
return []
|
|
@ -0,0 +1,308 @@
|
||||||
|
"""
|
||||||
|
homeharvest.zillow.__init__
|
||||||
|
~~~~~~~~~~~~
|
||||||
|
|
||||||
|
This module implements the scraper for zillow.com
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
import tls_client
|
||||||
|
|
||||||
|
from .. import Scraper
|
||||||
|
from requests.exceptions import HTTPError
|
||||||
|
from ....exceptions import GeoCoordsNotFound, NoResultsFound
|
||||||
|
from ..models import Property, Address, Status
|
||||||
|
import urllib.parse
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
class ZillowScraper(Scraper):
|
||||||
|
def __init__(self, scraper_input):
|
||||||
|
session = tls_client.Session(
|
||||||
|
client_identifier="chrome112", random_tls_extension_order=True
|
||||||
|
)
|
||||||
|
|
||||||
|
super().__init__(scraper_input, session)
|
||||||
|
|
||||||
|
self.session.headers.update(
|
||||||
|
{
|
||||||
|
"authority": "www.zillow.com",
|
||||||
|
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||||
|
"accept-language": "en-US,en;q=0.9",
|
||||||
|
"cache-control": "max-age=0",
|
||||||
|
"sec-fetch-dest": "document",
|
||||||
|
"sec-fetch-mode": "navigate",
|
||||||
|
"sec-fetch-site": "same-origin",
|
||||||
|
"sec-fetch-user": "?1",
|
||||||
|
"upgrade-insecure-requests": "1",
|
||||||
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.is_plausible_location(self.location):
|
||||||
|
raise NoResultsFound("Invalid location input: {}".format(self.location))
|
||||||
|
|
||||||
|
listing_type_to_url_path = {
|
||||||
|
Status.FOR_SALE: "for_sale",
|
||||||
|
Status.FOR_RENT: "for_rent",
|
||||||
|
Status.SOLD: "recently_sold",
|
||||||
|
}
|
||||||
|
|
||||||
|
self.url = f"https://www.zillow.com/homes/{listing_type_to_url_path[self.listing_type]}/{self.location}_rb/"
|
||||||
|
|
||||||
|
def is_plausible_location(self, location: str) -> bool:
|
||||||
|
url = (
|
||||||
|
"https://www.zillowstatic.com/autocomplete/v3/suggestions?q={"
|
||||||
|
"}&abKey=6666272a-4b99-474c-b857-110ec438732b&clientId=homepage-render"
|
||||||
|
).format(urllib.parse.quote(location))
|
||||||
|
|
||||||
|
resp = self.session.get(url)
|
||||||
|
|
||||||
|
return resp.json()["results"] != []
|
||||||
|
|
||||||
|
def search(self):
|
||||||
|
resp = self.session.get(self.url)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise HTTPError(f"bad response status code: {resp.status_code}")
|
||||||
|
content = resp.text
|
||||||
|
|
||||||
|
match = re.search(
|
||||||
|
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
|
||||||
|
content,
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
if not match:
|
||||||
|
raise NoResultsFound(
|
||||||
|
"No results were found for Zillow with the given Location."
|
||||||
|
)
|
||||||
|
|
||||||
|
json_str = match.group(1)
|
||||||
|
data = json.loads(json_str)
|
||||||
|
|
||||||
|
if "searchPageState" in data["props"]["pageProps"]:
|
||||||
|
pattern = r'window\.mapBounds = \{\s*"west":\s*(-?\d+\.\d+),\s*"east":\s*(-?\d+\.\d+),\s*"south":\s*(-?\d+\.\d+),\s*"north":\s*(-?\d+\.\d+)\s*\};'
|
||||||
|
|
||||||
|
match = re.search(pattern, content)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
coords = [float(coord) for coord in match.groups()]
|
||||||
|
return self._fetch_properties_backend(coords)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise GeoCoordsNotFound("Box bounds could not be located.")
|
||||||
|
|
||||||
|
elif "gdpClientCache" in data["props"]["pageProps"]:
|
||||||
|
gdp_client_cache = json.loads(data["props"]["pageProps"]["gdpClientCache"])
|
||||||
|
main_key = list(gdp_client_cache.keys())[0]
|
||||||
|
|
||||||
|
property_data = gdp_client_cache[main_key]["property"]
|
||||||
|
property = self._get_single_property_page(property_data)
|
||||||
|
|
||||||
|
return [property]
|
||||||
|
raise NoResultsFound("Specific property data not found in the response.")
|
||||||
|
|
||||||
|
def _fetch_properties_backend(self, coords):
|
||||||
|
url = "https://www.zillow.com/async-create-search-page-state"
|
||||||
|
|
||||||
|
filter_state_for_sale = {
|
||||||
|
"sortSelection": {
|
||||||
|
# "value": "globalrelevanceex"
|
||||||
|
"value": "days"
|
||||||
|
},
|
||||||
|
"isAllHomes": {"value": True},
|
||||||
|
}
|
||||||
|
|
||||||
|
filter_state_for_rent = {
|
||||||
|
"isForRent": {"value": True},
|
||||||
|
"isForSaleByAgent": {"value": False},
|
||||||
|
"isForSaleByOwner": {"value": False},
|
||||||
|
"isNewConstruction": {"value": False},
|
||||||
|
"isComingSoon": {"value": False},
|
||||||
|
"isAuction": {"value": False},
|
||||||
|
"isForSaleForeclosure": {"value": False},
|
||||||
|
"isAllHomes": {"value": True},
|
||||||
|
}
|
||||||
|
|
||||||
|
filter_state_sold = {
|
||||||
|
"isRecentlySold": {"value": True},
|
||||||
|
"isForSaleByAgent": {"value": False},
|
||||||
|
"isForSaleByOwner": {"value": False},
|
||||||
|
"isNewConstruction": {"value": False},
|
||||||
|
"isComingSoon": {"value": False},
|
||||||
|
"isAuction": {"value": False},
|
||||||
|
"isForSaleForeclosure": {"value": False},
|
||||||
|
"isAllHomes": {"value": True},
|
||||||
|
}
|
||||||
|
|
||||||
|
selected_filter = (
|
||||||
|
filter_state_for_rent
|
||||||
|
if self.listing_type == Status.FOR_RENT
|
||||||
|
else filter_state_for_sale
|
||||||
|
if self.listing_type == Status.FOR_SALE
|
||||||
|
else filter_state_sold
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"searchQueryState": {
|
||||||
|
"pagination": {},
|
||||||
|
"isMapVisible": True,
|
||||||
|
"mapBounds": {
|
||||||
|
"west": coords[0],
|
||||||
|
"east": coords[1],
|
||||||
|
"south": coords[2],
|
||||||
|
"north": coords[3],
|
||||||
|
},
|
||||||
|
"filterState": selected_filter,
|
||||||
|
"isListVisible": True,
|
||||||
|
"mapZoom": 11,
|
||||||
|
},
|
||||||
|
"wants": {"cat1": ["mapResults"]},
|
||||||
|
"isDebugRequest": False,
|
||||||
|
}
|
||||||
|
resp = self.session.put(url, json=payload)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise HTTPError(f"bad response status code: {resp.status_code}")
|
||||||
|
return self._parse_properties(resp.json())
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_posted_time(time: str) -> datetime:
|
||||||
|
int_time = int(time.split(" ")[0])
|
||||||
|
|
||||||
|
if "hour" in time:
|
||||||
|
return datetime.now() - timedelta(hours=int_time)
|
||||||
|
|
||||||
|
if "day" in time:
|
||||||
|
return datetime.now() - timedelta(days=int_time)
|
||||||
|
|
||||||
|
def _parse_properties(self, property_data: dict):
|
||||||
|
mapresults = property_data["cat1"]["searchResults"]["mapResults"]
|
||||||
|
|
||||||
|
properties_list = []
|
||||||
|
|
||||||
|
for result in mapresults:
|
||||||
|
if "hdpData" in result:
|
||||||
|
home_info = result["hdpData"]["homeInfo"]
|
||||||
|
address_data = {
|
||||||
|
"streeet": home_info.get("streetAddress"),
|
||||||
|
"city": home_info.get("city"),
|
||||||
|
"state": home_info.get("state"),
|
||||||
|
"zip": home_info.get("zipcode"),
|
||||||
|
}
|
||||||
|
property_obj = Property(
|
||||||
|
address=Address(**address_data),
|
||||||
|
property_url=f"https://www.zillow.com{result['detailUrl']}",
|
||||||
|
style=home_info.get("homeType"),
|
||||||
|
status=home_info["statusType"].upper()
|
||||||
|
if "statusType" in home_info
|
||||||
|
else self.status,
|
||||||
|
list_price=home_info.get("price"),
|
||||||
|
beds=int(home_info["bedrooms"])
|
||||||
|
if "bedrooms" in home_info
|
||||||
|
else None,
|
||||||
|
baths_full=home_info.get("bathrooms"),
|
||||||
|
est_sf=int(home_info["livingArea"])
|
||||||
|
if "livingArea" in home_info
|
||||||
|
else None,
|
||||||
|
prc_sqft=int(home_info["price"] // home_info["livingArea"])
|
||||||
|
if "livingArea" in home_info
|
||||||
|
and home_info["livingArea"] != 0
|
||||||
|
and "price" in home_info
|
||||||
|
else None,
|
||||||
|
latitude=result["latLong"]["latitude"],
|
||||||
|
longitude=result["latLong"]["longitude"],
|
||||||
|
lot_sf=round(home_info["lotAreaValue"], 2)
|
||||||
|
if "lotAreaValue" in home_info
|
||||||
|
else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
properties_list.append(property_obj)
|
||||||
|
|
||||||
|
elif "isBuilding" in result:
|
||||||
|
price_string = (
|
||||||
|
result["price"]
|
||||||
|
.replace("$", "")
|
||||||
|
.replace(",", "")
|
||||||
|
.replace("+/mo", "")
|
||||||
|
)
|
||||||
|
|
||||||
|
match = re.search(r"(\d+)", price_string)
|
||||||
|
price_value = int(match.group(1)) if match else None
|
||||||
|
building_obj = Property(
|
||||||
|
property_url=f"https://www.zillow.com{result['detailUrl']}",
|
||||||
|
style="BUILDING",
|
||||||
|
address=self._extract_address(result["address"]),
|
||||||
|
baths_full=result.get("minBaths"),
|
||||||
|
neighborhoods=result.get("communityName"),
|
||||||
|
list_price=price_value if "+/mo" in result.get("price") else None,
|
||||||
|
latitude=result.get("latLong", {}).get("latitude"),
|
||||||
|
longitude=result.get("latLong", {}).get("longitude"),
|
||||||
|
)
|
||||||
|
|
||||||
|
properties_list.append(building_obj)
|
||||||
|
|
||||||
|
return properties_list
|
||||||
|
|
||||||
|
def _get_single_property_page(self, property_data: dict):
|
||||||
|
"""
|
||||||
|
This method is used when a user enters the exact location & zillow returns just one property
|
||||||
|
"""
|
||||||
|
url = (
|
||||||
|
f"https://www.zillow.com{property_data['hdpUrl']}"
|
||||||
|
if "zillow.com" not in property_data["hdpUrl"]
|
||||||
|
else property_data["hdpUrl"]
|
||||||
|
)
|
||||||
|
address_data = property_data["address"]
|
||||||
|
address = Address(
|
||||||
|
street=address_data["streetAddress"],
|
||||||
|
city=address_data["city"],
|
||||||
|
state=address_data["state"],
|
||||||
|
zip=address_data["zipcode"],
|
||||||
|
)
|
||||||
|
property_type = property_data.get("homeType", None)
|
||||||
|
return Property(
|
||||||
|
property_url=url,
|
||||||
|
status=self.status,
|
||||||
|
address=address,
|
||||||
|
yr_blt=property_data.get("yearBuilt"),
|
||||||
|
lot_sf=property_data.get("lotAreaValue"),
|
||||||
|
stories=property_data.get("resoFacts", {}).get("stories"),
|
||||||
|
mls_id=property_data.get("attributionInfo", {}).get("mlsId"),
|
||||||
|
beds=property_data.get("bedrooms"),
|
||||||
|
baths_full=property_data.get("bathrooms"),
|
||||||
|
list_price=property_data.get("price"),
|
||||||
|
est_sf=property_data.get("livingArea"),
|
||||||
|
prc_sqft=property_data.get("resoFacts", {}).get("pricePerSquareFoot"),
|
||||||
|
latitude=property_data.get("latitude"),
|
||||||
|
longitude=property_data.get("longitude"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extract_address(self, address_str):
|
||||||
|
"""
|
||||||
|
Extract address components from a string formatted like '555 Wedglea Dr, Dallas, TX',
|
||||||
|
and return an Address object.
|
||||||
|
"""
|
||||||
|
parts = address_str.split(", ")
|
||||||
|
|
||||||
|
if len(parts) != 3:
|
||||||
|
raise ValueError(f"Unexpected address format: {address_str}")
|
||||||
|
|
||||||
|
address_one = parts[0].strip()
|
||||||
|
city = parts[1].strip()
|
||||||
|
state_zip = parts[2].split(" ")
|
||||||
|
|
||||||
|
if len(state_zip) == 1:
|
||||||
|
state = state_zip[0].strip()
|
||||||
|
zip_code = None
|
||||||
|
elif len(state_zip) == 2:
|
||||||
|
state = state_zip[0].strip()
|
||||||
|
zip_code = state_zip[1].strip()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unexpected state/zip format in address: {address_str}")
|
||||||
|
|
||||||
|
return Address(
|
||||||
|
street=address_one,
|
||||||
|
city=city,
|
||||||
|
state=state,
|
||||||
|
zip=zip_code,
|
||||||
|
)
|
Loading…
Reference in New Issue