From 79082090cb5e0ed3cb9b2af70c28a8a4b8e18ad4 Mon Sep 17 00:00:00 2001 From: Zachary Hampton Date: Tue, 15 Jul 2025 12:25:43 -0700 Subject: [PATCH] - pydantic conversion --- homeharvest/core/scrapers/__init__.py | 5 +-- homeharvest/core/scrapers/models.py | 56 ++++++++++++----------- homeharvest/utils.py | 64 +++++++++++++-------------- 3 files changed, 64 insertions(+), 61 deletions(-) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 466bb34..8e243c1 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -1,5 +1,4 @@ from __future__ import annotations -from dataclasses import dataclass from typing import Union import requests @@ -9,10 +8,10 @@ import uuid from ...exceptions import AuthenticationError from .models import Property, ListingType, SiteName, SearchPropertyType, ReturnType import json +from pydantic import BaseModel -@dataclass -class ScraperInput: +class ScraperInput(BaseModel): location: str listing_type: ListingType property_type: list[SearchPropertyType] | None = None diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index a6faba0..488f39a 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -1,7 +1,7 @@ from __future__ import annotations -from dataclasses import dataclass from enum import Enum from typing import Optional +from pydantic import BaseModel, computed_field class ReturnType(Enum): @@ -44,12 +44,6 @@ class ListingType(Enum): SOLD = "SOLD" -@dataclass -class Agent: - name: str | None = None - phone: str | None = None - - class PropertyType(Enum): APARTMENT = "APARTMENT" BUILDING = "BUILDING" @@ -74,21 +68,40 @@ class PropertyType(Enum): OTHER = "OTHER" -@dataclass -class Address: - formatted_address: str | None = None +class Address(BaseModel): full_line: str | None = None street: str | None = None unit: str | None = None city: str | None = None state: str | None = None zip: str | None = None + + @computed_field + @property + def formatted_address(self) -> str | None: + """Computed property that combines full_line, city, state, and zip into a formatted address.""" + parts = [] + + if self.full_line: + parts.append(self.full_line) + + city_state_zip = [] + if self.city: + city_state_zip.append(self.city) + if self.state: + city_state_zip.append(self.state) + if self.zip: + city_state_zip.append(self.zip) + + if city_state_zip: + parts.append(", ".join(city_state_zip)) + + return ", ".join(parts) if parts else None -@dataclass -class Description: +class Description(BaseModel): primary_photo: str | None = None alt_photos: list[str] | None = None style: PropertyType | None = None @@ -104,21 +117,18 @@ class Description: text: str | None = None -@dataclass -class AgentPhone: #: For documentation purposes only (at the moment) +class AgentPhone(BaseModel): number: str | None = None type: str | None = None primary: bool | None = None ext: str | None = None -@dataclass -class Entity: +class Entity(BaseModel): name: str uuid: str | None = None -@dataclass class Agent(Entity): mls_set: str | None = None nrds_id: str | None = None @@ -127,7 +137,6 @@ class Agent(Entity): href: str | None = None -@dataclass class Office(Entity): mls_set: str | None = None email: str | None = None @@ -135,28 +144,23 @@ class Office(Entity): phones: list[dict] | AgentPhone | None = None -@dataclass class Broker(Entity): pass -@dataclass class Builder(Entity): pass -@dataclass -class Advertisers: +class Advertisers(BaseModel): agent: Agent | None = None broker: Broker | None = None builder: Builder | None = None office: Office | None = None -@dataclass -class Property: +class Property(BaseModel): property_url: str - property_id: str #: allows_cats: bool #: allows_dogs: bool @@ -188,7 +192,7 @@ class Property: neighborhoods: Optional[str] = None county: Optional[str] = None fips_code: Optional[str] = None - nearby_schools: list[str] = None + nearby_schools: list[str] | None = None assessed_value: int | None = None estimated_value: int | None = None tax: int | None = None diff --git a/homeharvest/utils.py b/homeharvest/utils.py index ee4a078..cb718af 100644 --- a/homeharvest/utils.py +++ b/homeharvest/utils.py @@ -69,45 +69,45 @@ ordered_properties = [ def process_result(result: Property) -> pd.DataFrame: prop_data = {prop: None for prop in ordered_properties} - prop_data.update(result.__dict__) + prop_data.update(result.model_dump()) - if "address" in prop_data: + if "address" in prop_data and prop_data["address"]: address_data = prop_data["address"] - prop_data["full_street_line"] = address_data.full_line - prop_data["street"] = address_data.street - prop_data["unit"] = address_data.unit - prop_data["city"] = address_data.city - prop_data["state"] = address_data.state - prop_data["zip_code"] = address_data.zip + prop_data["full_street_line"] = address_data.get("full_line") + prop_data["street"] = address_data.get("street") + prop_data["unit"] = address_data.get("unit") + prop_data["city"] = address_data.get("city") + prop_data["state"] = address_data.get("state") + prop_data["zip_code"] = address_data.get("zip") if "advertisers" in prop_data and prop_data.get("advertisers"): - advertiser_data: Advertisers | None = prop_data["advertisers"] - if advertiser_data.agent: - agent_data = advertiser_data.agent - prop_data["agent_id"] = agent_data.uuid - prop_data["agent_name"] = agent_data.name - prop_data["agent_email"] = agent_data.email - prop_data["agent_phones"] = agent_data.phones - prop_data["agent_mls_set"] = agent_data.mls_set - prop_data["agent_nrds_id"] = agent_data.nrds_id + advertiser_data = prop_data["advertisers"] + if advertiser_data.get("agent"): + agent_data = advertiser_data["agent"] + prop_data["agent_id"] = agent_data.get("uuid") + prop_data["agent_name"] = agent_data.get("name") + prop_data["agent_email"] = agent_data.get("email") + prop_data["agent_phones"] = agent_data.get("phones") + prop_data["agent_mls_set"] = agent_data.get("mls_set") + prop_data["agent_nrds_id"] = agent_data.get("nrds_id") - if advertiser_data.broker: - broker_data = advertiser_data.broker - prop_data["broker_id"] = broker_data.uuid - prop_data["broker_name"] = broker_data.name + if advertiser_data.get("broker"): + broker_data = advertiser_data["broker"] + prop_data["broker_id"] = broker_data.get("uuid") + prop_data["broker_name"] = broker_data.get("name") - if advertiser_data.builder: - builder_data = advertiser_data.builder - prop_data["builder_id"] = builder_data.uuid - prop_data["builder_name"] = builder_data.name + if advertiser_data.get("builder"): + builder_data = advertiser_data["builder"] + prop_data["builder_id"] = builder_data.get("uuid") + prop_data["builder_name"] = builder_data.get("name") - if advertiser_data.office: - office_data = advertiser_data.office - prop_data["office_id"] = office_data.uuid - prop_data["office_name"] = office_data.name - prop_data["office_email"] = office_data.email - prop_data["office_phones"] = office_data.phones - prop_data["office_mls_set"] = office_data.mls_set + if advertiser_data.get("office"): + office_data = advertiser_data["office"] + prop_data["office_id"] = office_data.get("uuid") + prop_data["office_name"] = office_data.get("name") + prop_data["office_email"] = office_data.get("email") + prop_data["office_phones"] = office_data.get("phones") + prop_data["office_mls_set"] = office_data.get("mls_set") prop_data["price_per_sqft"] = prop_data["prc_sqft"] prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None