HomeHarvest/homeharvest/utils.py

167 lines
5.4 KiB
Python
Raw Normal View History

from __future__ import annotations
2023-10-03 22:21:16 -07:00
import pandas as pd
2023-11-03 16:35:41 -07:00
from datetime import datetime
from .core.scrapers.models import Property, ListingType, Advertisers
2023-11-03 16:35:41 -07:00
from .exceptions import InvalidListingType, InvalidDate
2023-10-03 22:21:16 -07:00
ordered_properties = [
2023-10-04 18:44:47 -07:00
"property_url",
"property_id",
"listing_id",
2023-10-04 18:44:47 -07:00
"mls",
"mls_id",
"status",
"text",
2023-10-04 18:44:47 -07:00
"style",
"full_street_line",
2023-10-04 18:44:47 -07:00
"street",
"unit",
"city",
"state",
"zip_code",
"beds",
"full_baths",
"half_baths",
"sqft",
"year_built",
2023-10-09 09:00:36 -07:00
"days_on_mls",
2023-10-04 18:44:47 -07:00
"list_price",
"list_price_min",
"list_price_max",
2023-10-04 18:44:47 -07:00
"list_date",
"sold_price",
"last_sold_date",
2024-04-30 13:29:54 -07:00
"assessed_value",
"estimated_value",
2025-01-06 03:28:36 -08:00
"tax",
"tax_history",
"new_construction",
2023-10-04 18:44:47 -07:00
"lot_sqft",
"price_per_sqft",
"latitude",
"longitude",
"neighborhoods",
"county",
"fips_code",
2023-10-04 18:44:47 -07:00
"stories",
"hoa_fee",
"parking_garage",
"agent_id",
"agent_name",
"agent_email",
"agent_phones",
2024-08-23 10:47:45 -07:00
"agent_mls_set",
"agent_nrds_id",
"broker_id",
"broker_name",
"builder_id",
"builder_name",
"office_id",
2024-08-23 10:54:43 -07:00
"office_mls_set",
"office_name",
"office_email",
"office_phones",
2024-04-16 18:01:20 -07:00
"nearby_schools",
"primary_photo",
"alt_photos",
2023-10-03 22:21:16 -07:00
]
def process_result(result: Property) -> pd.DataFrame:
prop_data = {prop: None for prop in ordered_properties}
prop_data.update(result.__dict__)
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["full_street_line"] = address_data.full_line
2023-10-04 18:44:47 -07:00
prop_data["street"] = address_data.street
prop_data["unit"] = address_data.unit
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip
2023-10-03 22:21:16 -07:00
if "advertisers" in prop_data and prop_data.get("advertisers"):
advertiser_data: Advertisers | None = prop_data["advertisers"]
if advertiser_data.agent:
agent_data = advertiser_data.agent
prop_data["agent_id"] = agent_data.uuid
prop_data["agent_name"] = agent_data.name
prop_data["agent_email"] = agent_data.email
prop_data["agent_phones"] = agent_data.phones
2024-08-23 10:47:45 -07:00
prop_data["agent_mls_set"] = agent_data.mls_set
prop_data["agent_nrds_id"] = agent_data.nrds_id
2024-04-16 12:55:44 -07:00
if advertiser_data.broker:
broker_data = advertiser_data.broker
prop_data["broker_id"] = broker_data.uuid
prop_data["broker_name"] = broker_data.name
if advertiser_data.builder:
builder_data = advertiser_data.builder
prop_data["builder_id"] = builder_data.uuid
prop_data["builder_name"] = builder_data.name
if advertiser_data.office:
office_data = advertiser_data.office
prop_data["office_id"] = office_data.uuid
prop_data["office_name"] = office_data.name
prop_data["office_email"] = office_data.email
prop_data["office_phones"] = office_data.phones
2024-08-23 10:54:43 -07:00
prop_data["office_mls_set"] = office_data.mls_set
2024-05-11 21:35:29 -07:00
2023-10-04 18:44:47 -07:00
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
2024-04-16 18:01:20 -07:00
prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None
prop_data["nearby_schools"] = ", ".join(set(prop_data["nearby_schools"])) if prop_data["nearby_schools"] else None
2023-10-03 22:21:16 -07:00
2023-10-04 06:58:55 -07:00
description = result.description
2024-06-06 15:24:12 -07:00
if description:
prop_data["primary_photo"] = description.primary_photo
prop_data["alt_photos"] = ", ".join(description.alt_photos) if description.alt_photos else None
2025-01-06 03:28:36 -08:00
prop_data["style"] = (
description.style
if isinstance(description.style, str)
else description.style.value if description.style else None
)
2024-06-06 15:24:12 -07:00
prop_data["beds"] = description.beds
prop_data["full_baths"] = description.baths_full
prop_data["half_baths"] = description.baths_half
prop_data["sqft"] = description.sqft
prop_data["lot_sqft"] = description.lot_sqft
prop_data["sold_price"] = description.sold_price
prop_data["year_built"] = description.year_built
prop_data["parking_garage"] = description.garage
prop_data["stories"] = description.stories
prop_data["text"] = description.text
2023-10-03 22:21:16 -07:00
properties_df = pd.DataFrame([prop_data])
properties_df = properties_df.reindex(columns=ordered_properties)
return properties_df[ordered_properties]
def validate_input(listing_type: str) -> None:
if listing_type.upper() not in ListingType.__members__:
2024-04-16 12:55:44 -07:00
raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")
2023-11-03 16:35:41 -07:00
def validate_dates(date_from: str | None, date_to: str | None) -> None:
if isinstance(date_from, str) != isinstance(date_to, str):
2023-11-03 16:35:41 -07:00
raise InvalidDate("Both date_from and date_to must be provided.")
if date_from and date_to:
try:
date_from_obj = datetime.strptime(date_from, "%Y-%m-%d")
date_to_obj = datetime.strptime(date_to, "%Y-%m-%d")
if date_to_obj < date_from_obj:
raise InvalidDate("date_to must be after date_from.")
except ValueError:
2023-11-03 16:35:41 -07:00
raise InvalidDate(f"Invalid date format or range")
def validate_limit(limit: int) -> None:
#: 1 -> 10000 limit
if limit is not None and (limit < 1 or limit > 10000):
raise ValueError("Property limit must be between 1 and 10,000.")