- cullen merge

This commit is contained in:
Zachary Hampton
2023-10-03 22:21:16 -07:00
parent 088088ae51
commit 29664e4eee
10 changed files with 258 additions and 996 deletions

View File

@@ -1,38 +1,76 @@
import re
from .core.scrapers.models import Property
import pandas as pd
ordered_properties = [
"PropertyURL",
"MLS",
"MLS #",
"Status",
"Style",
"Street",
"Unit",
"City",
"State",
"Zip",
"Beds",
"FB",
"NumHB",
"EstSF",
"YrBlt",
"ListPrice",
"Lst Date",
"Sold Price",
"COEDate",
"LotSFApx",
"PrcSqft",
"LATITUDE",
"LONGITUDE",
"Stories",
"HOAFee",
"PrkgGar",
"Community",
]
def parse_address_one(street_address: str) -> tuple:
if not street_address:
return street_address, "#"
def process_result(result: Property) -> pd.DataFrame:
prop_data = {prop: None for prop in ordered_properties}
prop_data.update(result.__dict__)
prop_data["PropertyURL"] = prop_data["property_url"]
prop_data["MLS"] = prop_data["mls"]
prop_data["MLS #"] = prop_data["mls_id"]
prop_data["Status"] = prop_data["status"]
prop_data["Style"] = prop_data["style"]
apt_match = re.search(
r"(APT\s*[\dA-Z]+|#[\dA-Z]+|UNIT\s*[\dA-Z]+|LOT\s*[\dA-Z]+|SUITE\s*[\dA-Z]+)$",
street_address,
re.I,
)
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["Street"] = address_data.street
prop_data["Unit"] = address_data.unit
prop_data["City"] = address_data.city
prop_data["State"] = address_data.state
prop_data["Zip"] = address_data.zip
if apt_match:
apt_str = apt_match.group().strip()
cleaned_apt_str = re.sub(r"(APT\s*|UNIT\s*|LOT\s*|SUITE\s*)", "#", apt_str, flags=re.I)
prop_data["Community"] = prop_data["neighborhoods"]
prop_data["Beds"] = prop_data["beds"]
prop_data["FB"] = prop_data["baths_full"]
prop_data["NumHB"] = prop_data["baths_half"]
prop_data["EstSF"] = prop_data["est_sf"]
prop_data["ListPrice"] = prop_data["list_price"]
prop_data["Lst Date"] = prop_data["list_date"]
prop_data["Sold Price"] = prop_data["sold_price"]
prop_data["COEDate"] = prop_data["last_sold_date"]
prop_data["LotSFApx"] = prop_data["lot_sf"]
prop_data["HOAFee"] = prop_data["hoa_fee"]
main_address = street_address.replace(apt_str, "").strip()
return main_address, cleaned_apt_str
else:
return street_address, "#"
if prop_data.get("prc_sqft") is not None:
prop_data["PrcSqft"] = round(prop_data["prc_sqft"], 2)
prop_data["YrBlt"] = prop_data["yr_blt"]
prop_data["LATITUDE"] = prop_data["latitude"]
prop_data["LONGITUDE"] = prop_data["longitude"]
prop_data["Stories"] = prop_data["stories"]
prop_data["PrkgGar"] = prop_data["prkg_gar"]
def parse_address_two(street_address: str):
if not street_address:
return "#"
apt_match = re.search(
r"(APT\s*[\dA-Z]+|#[\dA-Z]+|UNIT\s*[\dA-Z]+|LOT\s*[\dA-Z]+|SUITE\s*[\dA-Z]+)$",
street_address,
re.I,
)
properties_df = pd.DataFrame([prop_data])
properties_df = properties_df.reindex(columns=ordered_properties)
if apt_match:
apt_str = apt_match.group().strip()
apt_str = re.sub(r"(APT\s*|UNIT\s*|LOT\s*|SUITE\s*)", "#", apt_str, flags=re.I)
return apt_str
else:
return "#"
return properties_df[ordered_properties]