From 5c2498c62b1da725278c3e3d906ec511f80979ba Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Wed, 13 Mar 2024 19:17:17 -0700 Subject: [PATCH] - pending date, property type fields (#45) - alt photos bug fix (#57) --- README.md | 1 + homeharvest/core/scrapers/models.py | 24 ++++++++++++++++++- homeharvest/core/scrapers/realtor/__init__.py | 23 +++++++++--------- pyproject.toml | 2 +- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 4f42079..2018d70 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,7 @@ Property │ ├── days_on_mls │ ├── list_price │ ├── list_date +│ ├── pending_date │ ├── sold_price │ ├── last_sold_date │ ├── price_per_sqft diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index 6787204..8497a93 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -23,6 +23,27 @@ class ListingType(Enum): SOLD = "SOLD" +class PropertyType(Enum): + APARTMENT = "APARTMENT" + BUILDING = "BUILDING" + COMMERCIAL = "COMMERCIAL" + CONDO_TOWNHOME = "CONDO_TOWNHOME" + CONDO_TOWNHOME_ROWHOME_COOP = "CONDO_TOWNHOME_ROWHOME_COOP" + CONDO = "CONDO" + CONDOS = "CONDOS" + COOP = "COOP" + DUPLEX_TRIPLEX = "DUPLEX_TRIPLEX" + FARM = "FARM" + INVESTMENT = "INVESTMENT" + LAND = "LAND" + MOBILE = "MOBILE" + MULTI_FAMILY = "MULTI_FAMILY" + RENTAL = "RENTAL" + SINGLE_FAMILY = "SINGLE_FAMILY" + TOWNHOMES = "TOWNHOMES" + OTHER = "OTHER" + + @dataclass class Address: street: str | None = None @@ -36,7 +57,7 @@ class Address: class Description: primary_photo: str | None = None alt_photos: list[str] | None = None - style: str | None = None + style: PropertyType | None = None beds: int | None = None baths_full: int | None = None baths_half: int | None = None @@ -58,6 +79,7 @@ class Property: list_price: int | None = None list_date: str | None = None + pending_date: str | None = None last_sold_date: str | None = None prc_sqft: int | None = None hoa_fee: int | None = None diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index f412b0a..e298ecc 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -9,7 +9,7 @@ from typing import Dict, Union, Optional from concurrent.futures import ThreadPoolExecutor, as_completed from .. import Scraper -from ..models import Property, Address, ListingType, Description +from ..models import Property, Address, ListingType, Description, PropertyType class RealtorScraper(Scraper): @@ -84,11 +84,10 @@ class RealtorScraper(Scraper): garage permalink } - primary_photo { - href - } - photos { - href + media { + photos { + href + } } } }""" @@ -120,9 +119,11 @@ class RealtorScraper(Scraper): "list_date") else None last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get( "sold_date") else None + pending_date_str = property_info["pending_date"].split("T")[0] if property_info.get("pending_date") else None list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None + pending_date = datetime.strptime(pending_date_str, "%Y-%m-%d") if pending_date_str else None today = datetime.now() days_on_mls = None @@ -150,6 +151,7 @@ class RealtorScraper(Scraper): and property_info["basic"].get("sqft") else None, last_sold_date=last_sold_date, + pending_date=pending_date, latitude=property_info["address"]["location"]["coordinate"].get("lat") if able_to_get_lat_long else None, @@ -158,9 +160,7 @@ class RealtorScraper(Scraper): else None, address=self._parse_address(property_info, search_type="handle_listing"), description=Description( - primary_photo=property_info["primary_photo"].get("href", "").replace("s.jpg", - "od-w480_h360_x2.webp?w=1080&q=75"), - alt_photos=self.process_alt_photos(property_info.get("photos", [])), + alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])), style=property_info["basic"].get("type", "").upper(), beds=property_info["basic"].get("beds"), baths_full=property_info["basic"].get("baths_full"), @@ -298,6 +298,7 @@ class RealtorScraper(Scraper): count total results { + pending_date property_id list_date status @@ -310,6 +311,7 @@ class RealtorScraper(Scraper): is_pending } description { + type sqft beds baths_full @@ -663,7 +665,6 @@ class RealtorScraper(Scraper): @staticmethod def _parse_description(result: dict) -> Description: - description_data = result.get("description", {}) if description_data is None or not isinstance(description_data, dict): @@ -683,7 +684,7 @@ class RealtorScraper(Scraper): return Description( primary_photo=primary_photo, alt_photos=RealtorScraper.process_alt_photos(result.get("photos")), - style=style, + style=PropertyType(style) if style else None, beds=description_data.get("beds"), baths_full=description_data.get("baths_full"), baths_half=description_data.get("baths_half"), diff --git a/pyproject.toml b/pyproject.toml index 9f82813..7803c3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.13" +version = "0.3.14" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/HomeHarvest"