- pending date, property type fields (#45)

- alt photos bug fix (#57)
pull/65/head v0.3.14
Zachary Hampton 2024-03-13 19:17:17 -07:00
parent d775540afd
commit 5c2498c62b
4 changed files with 37 additions and 13 deletions

View File

@ -124,6 +124,7 @@ Property
│ ├── days_on_mls │ ├── days_on_mls
│ ├── list_price │ ├── list_price
│ ├── list_date │ ├── list_date
│ ├── pending_date
│ ├── sold_price │ ├── sold_price
│ ├── last_sold_date │ ├── last_sold_date
│ ├── price_per_sqft │ ├── price_per_sqft

View File

@ -23,6 +23,27 @@ class ListingType(Enum):
SOLD = "SOLD" SOLD = "SOLD"
class PropertyType(Enum):
APARTMENT = "APARTMENT"
BUILDING = "BUILDING"
COMMERCIAL = "COMMERCIAL"
CONDO_TOWNHOME = "CONDO_TOWNHOME"
CONDO_TOWNHOME_ROWHOME_COOP = "CONDO_TOWNHOME_ROWHOME_COOP"
CONDO = "CONDO"
CONDOS = "CONDOS"
COOP = "COOP"
DUPLEX_TRIPLEX = "DUPLEX_TRIPLEX"
FARM = "FARM"
INVESTMENT = "INVESTMENT"
LAND = "LAND"
MOBILE = "MOBILE"
MULTI_FAMILY = "MULTI_FAMILY"
RENTAL = "RENTAL"
SINGLE_FAMILY = "SINGLE_FAMILY"
TOWNHOMES = "TOWNHOMES"
OTHER = "OTHER"
@dataclass @dataclass
class Address: class Address:
street: str | None = None street: str | None = None
@ -36,7 +57,7 @@ class Address:
class Description: class Description:
primary_photo: str | None = None primary_photo: str | None = None
alt_photos: list[str] | None = None alt_photos: list[str] | None = None
style: str | None = None style: PropertyType | None = None
beds: int | None = None beds: int | None = None
baths_full: int | None = None baths_full: int | None = None
baths_half: int | None = None baths_half: int | None = None
@ -58,6 +79,7 @@ class Property:
list_price: int | None = None list_price: int | None = None
list_date: str | None = None list_date: str | None = None
pending_date: str | None = None
last_sold_date: str | None = None last_sold_date: str | None = None
prc_sqft: int | None = None prc_sqft: int | None = None
hoa_fee: int | None = None hoa_fee: int | None = None

View File

@ -9,7 +9,7 @@ from typing import Dict, Union, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from .. import Scraper from .. import Scraper
from ..models import Property, Address, ListingType, Description from ..models import Property, Address, ListingType, Description, PropertyType
class RealtorScraper(Scraper): class RealtorScraper(Scraper):
@ -84,11 +84,10 @@ class RealtorScraper(Scraper):
garage garage
permalink permalink
} }
primary_photo { media {
href photos {
} href
photos { }
href
} }
} }
}""" }"""
@ -120,9 +119,11 @@ class RealtorScraper(Scraper):
"list_date") else None "list_date") else None
last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get( last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get(
"sold_date") else None "sold_date") else None
pending_date_str = property_info["pending_date"].split("T")[0] if property_info.get("pending_date") else None
list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
pending_date = datetime.strptime(pending_date_str, "%Y-%m-%d") if pending_date_str else None
today = datetime.now() today = datetime.now()
days_on_mls = None days_on_mls = None
@ -150,6 +151,7 @@ class RealtorScraper(Scraper):
and property_info["basic"].get("sqft") and property_info["basic"].get("sqft")
else None, else None,
last_sold_date=last_sold_date, last_sold_date=last_sold_date,
pending_date=pending_date,
latitude=property_info["address"]["location"]["coordinate"].get("lat") latitude=property_info["address"]["location"]["coordinate"].get("lat")
if able_to_get_lat_long if able_to_get_lat_long
else None, else None,
@ -158,9 +160,7 @@ class RealtorScraper(Scraper):
else None, else None,
address=self._parse_address(property_info, search_type="handle_listing"), address=self._parse_address(property_info, search_type="handle_listing"),
description=Description( description=Description(
primary_photo=property_info["primary_photo"].get("href", "").replace("s.jpg", alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])),
"od-w480_h360_x2.webp?w=1080&q=75"),
alt_photos=self.process_alt_photos(property_info.get("photos", [])),
style=property_info["basic"].get("type", "").upper(), style=property_info["basic"].get("type", "").upper(),
beds=property_info["basic"].get("beds"), beds=property_info["basic"].get("beds"),
baths_full=property_info["basic"].get("baths_full"), baths_full=property_info["basic"].get("baths_full"),
@ -298,6 +298,7 @@ class RealtorScraper(Scraper):
count count
total total
results { results {
pending_date
property_id property_id
list_date list_date
status status
@ -310,6 +311,7 @@ class RealtorScraper(Scraper):
is_pending is_pending
} }
description { description {
type
sqft sqft
beds beds
baths_full baths_full
@ -663,7 +665,6 @@ class RealtorScraper(Scraper):
@staticmethod @staticmethod
def _parse_description(result: dict) -> Description: def _parse_description(result: dict) -> Description:
description_data = result.get("description", {}) description_data = result.get("description", {})
if description_data is None or not isinstance(description_data, dict): if description_data is None or not isinstance(description_data, dict):
@ -683,7 +684,7 @@ class RealtorScraper(Scraper):
return Description( return Description(
primary_photo=primary_photo, primary_photo=primary_photo,
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")), alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
style=style, style=PropertyType(style) if style else None,
beds=description_data.get("beds"), beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"), baths_full=description_data.get("baths_full"),
baths_half=description_data.get("baths_half"), baths_half=description_data.get("baths_half"),

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "homeharvest" name = "homeharvest"
version = "0.3.13" version = "0.3.14"
description = "Real estate scraping library" description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest" homepage = "https://github.com/Bunsly/HomeHarvest"