mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-06 20:44:28 -08:00
fix: simplify fields
This commit is contained in:
@@ -1,7 +1,13 @@
|
||||
"""
|
||||
homeharvest.redfin.__init__
|
||||
~~~~~~~~~~~~
|
||||
|
||||
This module implements the scraper for redfin.com
|
||||
"""
|
||||
import json
|
||||
from typing import Any
|
||||
from .. import Scraper
|
||||
from ....utils import parse_address_two, parse_unit
|
||||
from ....utils import parse_address_two, parse_address_one
|
||||
from ..models import Property, Address, PropertyType, ListingType, SiteName
|
||||
from ....exceptions import NoResultsFound
|
||||
|
||||
@@ -12,9 +18,7 @@ class RedfinScraper(Scraper):
|
||||
self.listing_type = scraper_input.listing_type
|
||||
|
||||
def _handle_location(self):
|
||||
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(
|
||||
self.location
|
||||
)
|
||||
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(self.location)
|
||||
|
||||
response = self.session.get(url)
|
||||
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||
@@ -28,9 +32,7 @@ class RedfinScraper(Scraper):
|
||||
return "address" #: address, needs to be handled differently
|
||||
|
||||
if "exactMatch" not in response_json["payload"]:
|
||||
raise NoResultsFound(
|
||||
"No results found for location: {}".format(self.location)
|
||||
)
|
||||
raise NoResultsFound("No results found for location: {}".format(self.location))
|
||||
|
||||
if response_json["payload"]["exactMatch"] is not None:
|
||||
target = response_json["payload"]["exactMatch"]
|
||||
@@ -45,39 +47,30 @@ class RedfinScraper(Scraper):
|
||||
return home[key]["value"]
|
||||
|
||||
if not single_search:
|
||||
street_address, unit = parse_address_two(get_value("streetLine"))
|
||||
unit = parse_unit(get_value("streetLine"))
|
||||
address = Address(
|
||||
street_address=street_address,
|
||||
address_one=parse_address_one(get_value("streetLine"))[0],
|
||||
address_two=parse_address_one(get_value("streetLine"))[1],
|
||||
city=home.get("city"),
|
||||
state=home.get("state"),
|
||||
zip_code=home.get("zip"),
|
||||
unit=unit,
|
||||
country="USA",
|
||||
)
|
||||
else:
|
||||
address_info = home.get("streetAddress")
|
||||
street_address, unit = parse_address_two(address_info.get("assembledAddress"))
|
||||
address_one, address_two = parse_address_one(address_info.get("assembledAddress"))
|
||||
|
||||
address = Address(
|
||||
street_address=street_address,
|
||||
address_one=address_one,
|
||||
address_two=address_two,
|
||||
city=home.get("city"),
|
||||
state=home.get("state"),
|
||||
zip_code=home.get("zip"),
|
||||
unit=unit,
|
||||
country="USA",
|
||||
)
|
||||
|
||||
url = "https://www.redfin.com{}".format(home["url"])
|
||||
#: property_type = home["propertyType"] if "propertyType" in home else None
|
||||
lot_size_data = home.get("lotSize")
|
||||
|
||||
if not isinstance(lot_size_data, int):
|
||||
lot_size = (
|
||||
lot_size_data.get("value", None)
|
||||
if isinstance(lot_size_data, dict)
|
||||
else None
|
||||
)
|
||||
lot_size = lot_size_data.get("value", None) if isinstance(lot_size_data, dict) else None
|
||||
else:
|
||||
lot_size = lot_size_data
|
||||
|
||||
@@ -86,26 +79,24 @@ class RedfinScraper(Scraper):
|
||||
listing_type=self.listing_type,
|
||||
address=address,
|
||||
property_url=url,
|
||||
beds=home["beds"] if "beds" in home else None,
|
||||
baths=home["baths"] if "baths" in home else None,
|
||||
beds_min=home["beds"] if "beds" in home else None,
|
||||
beds_max=home["beds"] if "beds" in home else None,
|
||||
baths_min=home["baths"] if "baths" in home else None,
|
||||
baths_max=home["baths"] if "baths" in home else None,
|
||||
price_min=get_value("price"),
|
||||
price_max=get_value("price"),
|
||||
sqft_min=get_value("sqFt"),
|
||||
sqft_max=get_value("sqFt"),
|
||||
stories=home["stories"] if "stories" in home else None,
|
||||
agent_name=get_value("listingAgent"),
|
||||
description=home["listingRemarks"] if "listingRemarks" in home else None,
|
||||
year_built=get_value("yearBuilt")
|
||||
if not single_search
|
||||
else home["yearBuilt"],
|
||||
square_feet=get_value("sqFt"),
|
||||
year_built=get_value("yearBuilt") if not single_search else home["yearBuilt"],
|
||||
lot_area_value=lot_size,
|
||||
property_type=PropertyType.from_int_code(home.get("propertyType")),
|
||||
price_per_sqft=get_value("pricePerSqFt"),
|
||||
price=get_value("price"),
|
||||
mls_id=get_value("mlsId"),
|
||||
latitude=home["latLong"]["latitude"]
|
||||
if "latLong" in home and "latitude" in home["latLong"]
|
||||
else None,
|
||||
longitude=home["latLong"]["longitude"]
|
||||
if "latLong" in home and "longitude" in home["latLong"]
|
||||
else None,
|
||||
latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
|
||||
longitude=home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None,
|
||||
)
|
||||
|
||||
def _handle_rentals(self, region_id, region_type):
|
||||
@@ -125,12 +116,10 @@ class RedfinScraper(Scraper):
|
||||
address_info = home_data.get("addressInfo", {})
|
||||
centroid = address_info.get("centroid", {}).get("centroid", {})
|
||||
address = Address(
|
||||
street_address=address_info.get("formattedStreetLine", None),
|
||||
city=address_info.get("city", None),
|
||||
state=address_info.get("state", None),
|
||||
zip_code=address_info.get("zip", None),
|
||||
unit=None,
|
||||
country="US" if address_info.get("countryCode", None) == 1 else None,
|
||||
address_one=parse_address_one(address_info.get("formattedStreetLine"))[0],
|
||||
city=address_info.get("city"),
|
||||
state=address_info.get("state"),
|
||||
zip_code=address_info.get("zip"),
|
||||
)
|
||||
|
||||
price_range = rental_data.get("rentPriceRange", {"min": None, "max": None})
|
||||
@@ -143,20 +132,20 @@ class RedfinScraper(Scraper):
|
||||
site_name=SiteName.REDFIN,
|
||||
listing_type=ListingType.FOR_RENT,
|
||||
address=address,
|
||||
apt_min_beds=bed_range.get("min", None),
|
||||
apt_min_baths=bath_range.get("min", None),
|
||||
apt_max_beds=bed_range.get("max", None),
|
||||
apt_max_baths=bath_range.get("max", None),
|
||||
description=rental_data.get("description", None),
|
||||
latitude=centroid.get("latitude", None),
|
||||
longitude=centroid.get("longitude", None),
|
||||
apt_min_price=price_range.get("min", None),
|
||||
apt_max_price=price_range.get("max", None),
|
||||
apt_min_sqft=sqft_range.get("min", None),
|
||||
apt_max_sqft=sqft_range.get("max", None),
|
||||
img_src=home_data.get("staticMapUrl", None),
|
||||
posted_time=rental_data.get("lastUpdated", None),
|
||||
bldg_name=rental_data.get("propertyName", None),
|
||||
description=rental_data.get("description"),
|
||||
latitude=centroid.get("latitude"),
|
||||
longitude=centroid.get("longitude"),
|
||||
baths_min=bath_range.get("min"),
|
||||
baths_max=bath_range.get("max"),
|
||||
beds_min=bed_range.get("min"),
|
||||
beds_max=bed_range.get("max"),
|
||||
price_min=price_range.get("min"),
|
||||
price_max=price_range.get("max"),
|
||||
sqft_min=sqft_range.get("min"),
|
||||
sqft_max=sqft_range.get("max"),
|
||||
img_src=home_data.get("staticMapUrl"),
|
||||
posted_time=rental_data.get("lastUpdated"),
|
||||
bldg_name=rental_data.get("propertyName"),
|
||||
)
|
||||
|
||||
properties_list.append(property_)
|
||||
@@ -175,16 +164,15 @@ class RedfinScraper(Scraper):
|
||||
building["address"]["streetType"],
|
||||
]
|
||||
)
|
||||
street_address, unit = parse_address_two(street_address)
|
||||
return Property(
|
||||
site_name=self.site_name,
|
||||
property_type=PropertyType("BUILDING"),
|
||||
address=Address(
|
||||
street_address=street_address,
|
||||
address_one=parse_address_one(street_address)[0],
|
||||
city=building["address"]["city"],
|
||||
state=building["address"]["stateOrProvinceCode"],
|
||||
zip_code=building["address"]["postalCode"],
|
||||
unit=parse_unit(
|
||||
address_two=parse_address_two(
|
||||
" ".join(
|
||||
[
|
||||
building["address"]["unitType"],
|
||||
@@ -195,7 +183,7 @@ class RedfinScraper(Scraper):
|
||||
),
|
||||
property_url="https://www.redfin.com{}".format(building["url"]),
|
||||
listing_type=self.listing_type,
|
||||
bldg_unit_count=building["numUnitsForSale"],
|
||||
unit_count=building["numUnitsForSale"],
|
||||
)
|
||||
|
||||
def handle_address(self, home_id: str):
|
||||
@@ -206,7 +194,6 @@ class RedfinScraper(Scraper):
|
||||
https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=147337694&accessLevel=3
|
||||
https://www.redfin.com/stingray/api/home/details/belowTheFold?propertyId=147337694&accessLevel=3
|
||||
"""
|
||||
|
||||
url = "https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId={}&accessLevel=3".format(
|
||||
home_id
|
||||
)
|
||||
@@ -214,9 +201,7 @@ class RedfinScraper(Scraper):
|
||||
response = self.session.get(url)
|
||||
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||
|
||||
parsed_home = self._parse_home(
|
||||
response_json["payload"]["addressSectionInfo"], single_search=True
|
||||
)
|
||||
parsed_home = self._parse_home(response_json["payload"]["addressSectionInfo"], single_search=True)
|
||||
return [parsed_home]
|
||||
|
||||
def search(self):
|
||||
@@ -235,10 +220,7 @@ class RedfinScraper(Scraper):
|
||||
url = f"https://www.redfin.com/stingray/api/gis?al=1®ion_id={region_id}®ion_type={region_type}&sold_within_days=30&num_homes=100000"
|
||||
response = self.session.get(url)
|
||||
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||
homes = [
|
||||
self._parse_home(home) for home in response_json["payload"]["homes"]
|
||||
] + [
|
||||
self._parse_building(building)
|
||||
for building in response_json["payload"]["buildings"].values()
|
||||
homes = [self._parse_home(home) for home in response_json["payload"]["homes"]] + [
|
||||
self._parse_building(building) for building in response_json["payload"]["buildings"].values()
|
||||
]
|
||||
return homes
|
||||
|
||||
Reference in New Issue
Block a user