diff --git a/.gitignore b/.gitignore index 1f97e4a..41dd5d2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ **/__pycache__/ **/.pytest_cache/ *.pyc -/.ipynb_checkpoints/ \ No newline at end of file +/.ipynb_checkpoints/ +*.csv \ No newline at end of file diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 728a8c2..c20b059 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -38,6 +38,9 @@ def get_ordered_properties(result: Property) -> list[str]: "currency", "price", "apt_min_price", + "apt_max_price", + "apt_min_sqft", + "apt_max_sqft", "tax_assessed_value", "square_feet", "price_per_sqft", diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index 8385405..3875a30 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -103,3 +103,6 @@ class Property: # apt apt_min_price: int | None = None + apt_max_price: int | None = None + apt_min_sqft: int | None = None + apt_max_sqft: int | None = None diff --git a/homeharvest/core/scrapers/redfin/__init__.py b/homeharvest/core/scrapers/redfin/__init__.py index 3e4fbde..511db46 100644 --- a/homeharvest/core/scrapers/redfin/__init__.py +++ b/homeharvest/core/scrapers/redfin/__init__.py @@ -2,7 +2,7 @@ import json from typing import Any from .. import Scraper from ....utils import parse_address_two, parse_unit -from ..models import Property, Address, PropertyType +from ..models import Property, Address, PropertyType, ListingType, SiteName from ....exceptions import NoResultsFound @@ -108,6 +108,64 @@ class RedfinScraper(Scraper): else None, ) + def _handle_rentals(self, region_id, region_type): + url = f"https://www.redfin.com/stingray/api/v1/search/rentals?al=1&isRentals=true®ion_id={region_id}®ion_type={region_type}" + + response = self.session.get(url) + response.raise_for_status() # This will raise an error if the response contains an HTTP error status. + + homes = response.json() + + properties_list = [] + + for home in homes["homes"]: + home_data = home["homeData"] + rental_data = home["rentalExtension"] + + property_url = f"https://www.redfin.com{home_data.get('url', '')}" + address_info = home_data.get("addressInfo", {}) + centroid = address_info.get("centroid", {}).get("centroid", {}) + address = Address( + street_address=address_info.get("formattedStreetLine", None), + city=address_info.get("city", None), + state=address_info.get("state", None), + zip_code=address_info.get("zip", None), + unit=None, + country="US" if address_info.get("countryCode", None) == 1 else None, + ) + + price_range = rental_data.get("rentPriceRange", {"min": None, "max": None}) + bed_range = rental_data.get("bedRange", {"min": None, "max": None}) + bath_range = rental_data.get("bathRange", {"min": None, "max": None}) + sqft_range = rental_data.get("sqftRange", {"min": None, "max": None}) + + property_ = Property( + property_url=property_url, + site_name=SiteName.REDFIN, + listing_type=ListingType.FOR_RENT, + address=address, + square_feet=sqft_range.get("min", None), + beds=bed_range.get("min", None), + baths=bath_range.get("min", None), + description=rental_data.get("description", None), + latitude=centroid.get("latitude", None), + longitude=centroid.get("longitude", None), + apt_min_price=price_range.get("min", None), + apt_max_price=price_range.get("max", None), + apt_min_sqft=sqft_range.get("min", None), + apt_max_sqft=sqft_range.get("max", None), + img_src=home_data.get("staticMapUrl", None), + posted_time=rental_data.get("lastUpdated", None), + bldg_name=rental_data.get("propertyName", None), + ) + + properties_list.append(property_) + + if not properties_list: + raise NoResultsFound("No rentals found for the given location.") + + return properties_list + def _parse_building(self, building: dict) -> Property: street_address = " ".join( [ @@ -168,18 +226,19 @@ class RedfinScraper(Scraper): home_id = region_id return self.handle_address(home_id) - url = "https://www.redfin.com/stingray/api/gis?al=1®ion_id={}®ion_type={}".format( - region_id, region_type - ) - - response = self.session.get(url) - response_json = json.loads(response.text.replace("{}&&", "")) - - homes = [ - self._parse_home(home) for home in response_json["payload"]["homes"] - ] + [ - self._parse_building(building) - for building in response_json["payload"]["buildings"].values() - ] - - return homes + if self.listing_type == ListingType.FOR_RENT: + return self._handle_rentals(region_id, region_type) + elif self.listing_type == ListingType.FOR_SALE: + url = f"https://www.redfin.com/stingray/api/gis?al=1®ion_id={region_id}®ion_type={region_type}" + response = self.session.get(url) + response_json = json.loads(response.text.replace("{}&&", "")) + homes = [ + self._parse_home(home) for home in response_json["payload"]["homes"] + ] + [ + self._parse_building(building) + for building in response_json["payload"]["buildings"].values() + ] + return homes + else: + # Handle other cases, maybe raise an error if the listing type is not recognized. + pass diff --git a/homeharvest/core/scrapers/zillow/__init__.py b/homeharvest/core/scrapers/zillow/__init__.py index 646872d..3eb108c 100644 --- a/homeharvest/core/scrapers/zillow/__init__.py +++ b/homeharvest/core/scrapers/zillow/__init__.py @@ -22,14 +22,14 @@ class ZillowScraper(Scraper): self.url = f"https://www.zillow.com/homes/recently_sold/{self.location}_rb/" def is_plausible_location(self, location: str) -> bool: - url = ('https://www.zillowstatic.com/autocomplete/v3/suggestions?q={' - '}&abKey=6666272a-4b99-474c-b857-110ec438732b&clientId=homepage-render').format( - location - ) + url = ( + "https://www.zillowstatic.com/autocomplete/v3/suggestions?q={" + "}&abKey=6666272a-4b99-474c-b857-110ec438732b&clientId=homepage-render" + ).format(location) response = self.session.get(url) - return response.json()['results'] != [] + return response.json()["results"] != [] def search(self): resp = self.session.get(self.url, headers=self._get_headers()) diff --git a/tests/test_redfin.py b/tests/test_redfin.py index 8d4c194..ef83f9e 100644 --- a/tests/test_redfin.py +++ b/tests/test_redfin.py @@ -15,9 +15,10 @@ def test_redfin(): scrape_property( location="Phoenix, AZ, USA", site_name=["redfin"], listing_type="for_rent" ), - scrape_property( - location="Dallas, TX, USA", site_name="redfin", listing_type="sold" - ), + # TODO + # scrape_property( + # location="Dallas, TX, USA", site_name="redfin", listing_type="sold" + # ), scrape_property(location="85281", site_name="redfin"), ]