HomeHarvest/homeharvest/utils.py

124 lines
4.0 KiB
Python
Raw Permalink Normal View History

from __future__ import annotations
2023-10-03 22:21:16 -07:00
import pandas as pd
2023-11-03 16:35:41 -07:00
from datetime import datetime
from .core.scrapers.models import Property, ListingType, Agent
2023-11-03 16:35:41 -07:00
from .exceptions import InvalidListingType, InvalidDate
2023-10-03 22:21:16 -07:00
ordered_properties = [
2023-10-04 18:44:47 -07:00
"property_url",
"mls",
"mls_id",
"status",
"text",
2023-10-04 18:44:47 -07:00
"style",
"full_street_line",
2023-10-04 18:44:47 -07:00
"street",
"unit",
"city",
"state",
"zip_code",
"beds",
"full_baths",
"half_baths",
"sqft",
"year_built",
2023-10-09 09:00:36 -07:00
"days_on_mls",
2023-10-04 18:44:47 -07:00
"list_price",
"list_date",
"sold_price",
"last_sold_date",
2024-04-30 13:29:54 -07:00
"assessed_value",
"estimated_value",
2023-10-04 18:44:47 -07:00
"lot_sqft",
"price_per_sqft",
"latitude",
"longitude",
"neighborhoods",
"county",
"fips_code",
2023-10-04 18:44:47 -07:00
"stories",
"hoa_fee",
"parking_garage",
2024-04-16 12:55:44 -07:00
"agent",
"agent_email",
"agent_phones",
2024-05-11 21:35:29 -07:00
"broker",
"broker_phone",
"broker_website",
2024-04-16 18:01:20 -07:00
"nearby_schools",
"primary_photo",
"alt_photos",
2023-10-03 22:21:16 -07:00
]
def process_result(result: Property) -> pd.DataFrame:
prop_data = {prop: None for prop in ordered_properties}
prop_data.update(result.__dict__)
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["full_street_line"] = address_data.full_line
2023-10-04 18:44:47 -07:00
prop_data["street"] = address_data.street
prop_data["unit"] = address_data.unit
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip
2023-10-03 22:21:16 -07:00
2024-04-16 12:55:44 -07:00
if "agents" in prop_data:
agents: list[Agent] | None = prop_data["agents"]
2024-04-16 12:55:44 -07:00
if agents:
prop_data["agent"] = agents[0].name
prop_data["agent_email"] = agents[0].email
prop_data["agent_phones"] = agents[0].phones
2024-04-16 12:55:44 -07:00
2024-05-11 21:35:29 -07:00
if "brokers" in prop_data:
brokers = prop_data["brokers"]
if brokers:
prop_data["broker"] = brokers[0].name
prop_data["broker_phone"] = brokers[0].phone
prop_data["broker_website"] = brokers[0].website
2023-10-04 18:44:47 -07:00
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
2024-04-16 18:01:20 -07:00
prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None
prop_data["nearby_schools"] = ", ".join(set(prop_data["nearby_schools"])) if prop_data["nearby_schools"] else None
2023-10-03 22:21:16 -07:00
2023-10-04 06:58:55 -07:00
description = result.description
2023-11-24 11:41:46 -08:00
prop_data["primary_photo"] = description.primary_photo
2024-05-18 17:47:55 -07:00
prop_data["alt_photos"] = ", ".join(description.alt_photos) if description.alt_photos else None
2024-04-30 13:29:54 -07:00
prop_data["style"] = description.style if type(description.style) == str else description.style.value
2023-10-04 18:44:47 -07:00
prop_data["beds"] = description.beds
prop_data["full_baths"] = description.baths_full
prop_data["half_baths"] = description.baths_half
prop_data["sqft"] = description.sqft
prop_data["lot_sqft"] = description.lot_sqft
prop_data["sold_price"] = description.sold_price
prop_data["year_built"] = description.year_built
prop_data["parking_garage"] = description.garage
prop_data["stories"] = description.stories
prop_data["text"] = description.text
2023-10-03 22:21:16 -07:00
properties_df = pd.DataFrame([prop_data])
properties_df = properties_df.reindex(columns=ordered_properties)
return properties_df[ordered_properties]
def validate_input(listing_type: str) -> None:
if listing_type.upper() not in ListingType.__members__:
2024-04-16 12:55:44 -07:00
raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")
2023-11-03 16:35:41 -07:00
def validate_dates(date_from: str | None, date_to: str | None) -> None:
if (date_from is not None and date_to is None) or (date_from is None and date_to is not None):
raise InvalidDate("Both date_from and date_to must be provided.")
if date_from and date_to:
try:
date_from_obj = datetime.strptime(date_from, "%Y-%m-%d")
date_to_obj = datetime.strptime(date_to, "%Y-%m-%d")
if date_to_obj < date_from_obj:
raise InvalidDate("date_to must be after date_from.")
except ValueError:
2023-11-03 16:35:41 -07:00
raise InvalidDate(f"Invalid date format or range")