- remove neighborhoods

- rename data
pull/31/head
Zachary Hampton 2023-10-04 18:44:47 -07:00
parent 8a5f0dc2c9
commit 37e20f4469
2 changed files with 44 additions and 56 deletions

View File

@ -459,7 +459,7 @@ class RealtorScraper(Scraper):
if able_to_get_lat_long if able_to_get_lat_long
else None, else None,
address=self._parse_address(result, search_type="general_search"), address=self._parse_address(result, search_type="general_search"),
neighborhoods=self._parse_neighborhoods(result), #: neighborhoods=self._parse_neighborhoods(result),
description=self._parse_description(result), description=self._parse_description(result),
) )
properties.append(realty_property) properties.append(realty_property)

View File

@ -1,74 +1,62 @@
from .core.scrapers.models import Property, ListingType from .core.scrapers.models import Property, ListingType
import pandas as pd import pandas as pd
from .exceptions import InvalidListingType
ordered_properties = [ ordered_properties = [
"PropertyURL", "property_url",
"MLS", "mls",
"MLS #", "mls_id",
"Status", "status",
"Style", "style",
"Street", "street",
"Unit", "unit",
"City", "city",
"State", "state",
"Zip", "zip_code",
"Beds", "beds",
"FB", "full_baths",
"NumHB", "half_baths",
"EstSF", "sqft",
"YrBlt", "year_built",
"ListPrice", "list_price",
"Lst Date", "list_date",
"Sold Price", "sold_price",
"COEDate", "last_sold_date",
"LotSFApx", "lot_sqft",
"PrcSqft", "price_per_sqft",
"LATITUDE", "latitude",
"LONGITUDE", "longitude",
"Stories", "stories",
"HOAFee", "hoa_fee",
"PrkgGar", "parking_garage",
"Community",
] ]
def process_result(result: Property) -> pd.DataFrame: def process_result(result: Property) -> pd.DataFrame:
prop_data = {prop: None for prop in ordered_properties} prop_data = {prop: None for prop in ordered_properties}
prop_data.update(result.__dict__) prop_data.update(result.__dict__)
prop_data["PropertyURL"] = prop_data["property_url"]
prop_data["MLS"] = prop_data["mls"]
prop_data["MLS #"] = prop_data["mls_id"]
prop_data["Status"] = prop_data["status"]
if "address" in prop_data: if "address" in prop_data:
address_data = prop_data["address"] address_data = prop_data["address"]
prop_data["Street"] = address_data.street prop_data["street"] = address_data.street
prop_data["Unit"] = address_data.unit prop_data["unit"] = address_data.unit
prop_data["City"] = address_data.city prop_data["city"] = address_data.city
prop_data["State"] = address_data.state prop_data["state"] = address_data.state
prop_data["Zip"] = address_data.zip prop_data["zip_code"] = address_data.zip
prop_data["ListPrice"] = prop_data["list_price"] prop_data["price_per_sqft"] = prop_data["prc_sqft"]
prop_data["Lst Date"] = prop_data["list_date"]
prop_data["COEDate"] = prop_data["last_sold_date"]
prop_data["PrcSqft"] = prop_data["prc_sqft"]
prop_data["HOAFee"] = prop_data["hoa_fee"]
description = result.description description = result.description
prop_data["Style"] = description.style prop_data["style"] = description.style
prop_data["Beds"] = description.beds prop_data["beds"] = description.beds
prop_data["FB"] = description.baths_full prop_data["full_baths"] = description.baths_full
prop_data["NumHB"] = description.baths_half prop_data["half_baths"] = description.baths_half
prop_data["EstSF"] = description.sqft prop_data["sqft"] = description.sqft
prop_data["LotSFApx"] = description.lot_sqft prop_data["lot_sqft"] = description.lot_sqft
prop_data["Sold Price"] = description.sold_price prop_data["sold_price"] = description.sold_price
prop_data["YrBlt"] = description.year_built prop_data["year_built"] = description.year_built
prop_data["PrkgGar"] = description.garage prop_data["parking_garage"] = description.garage
prop_data["Stories"] = description.stories prop_data["stories"] = description.stories
prop_data["LATITUDE"] = prop_data["latitude"]
prop_data["LONGITUDE"] = prop_data["longitude"]
prop_data["Community"] = prop_data["neighborhoods"]
properties_df = pd.DataFrame([prop_data]) properties_df = pd.DataFrame([prop_data])
properties_df = properties_df.reindex(columns=ordered_properties) properties_df = properties_df.reindex(columns=ordered_properties)