mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 19:44:29 -08:00
enh: property type (#102)
This commit is contained in:
104
examples/price_of_land.py
Normal file
104
examples/price_of_land.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
This script scrapes sold and pending sold land listings in past year for a list of zip codes and saves the data to individual Excel files.
|
||||
It adds two columns to the data: 'lot_acres' and 'ppa' (price per acre) for user to analyze average price of land in a zip code.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
from homeharvest import scrape_property
|
||||
|
||||
|
||||
def get_property_details(zip: str, listing_type):
|
||||
properties = scrape_property(location=zip, listing_type=listing_type, property_type=["land"], past_days=365)
|
||||
if not properties.empty:
|
||||
properties["lot_acres"] = properties["lot_sqft"].apply(lambda x: x / 43560 if pd.notnull(x) else None)
|
||||
|
||||
properties = properties[properties["sqft"].isnull()]
|
||||
properties["ppa"] = properties.apply(
|
||||
lambda row: (
|
||||
int(
|
||||
(
|
||||
row["sold_price"]
|
||||
if (pd.notnull(row["sold_price"]) and row["status"] == "SOLD")
|
||||
else row["list_price"]
|
||||
)
|
||||
/ row["lot_acres"]
|
||||
)
|
||||
if pd.notnull(row["lot_acres"])
|
||||
and row["lot_acres"] > 0
|
||||
and (pd.notnull(row["sold_price"]) or pd.notnull(row["list_price"]))
|
||||
else None
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
properties["ppa"] = properties["ppa"].astype("Int64")
|
||||
selected_columns = [
|
||||
"property_url",
|
||||
"property_id",
|
||||
"style",
|
||||
"status",
|
||||
"street",
|
||||
"city",
|
||||
"state",
|
||||
"zip_code",
|
||||
"county",
|
||||
"list_date",
|
||||
"last_sold_date",
|
||||
"list_price",
|
||||
"sold_price",
|
||||
"lot_sqft",
|
||||
"lot_acres",
|
||||
"ppa",
|
||||
]
|
||||
properties = properties[selected_columns]
|
||||
return properties
|
||||
|
||||
|
||||
def output_to_excel(zip_code, sold_df, pending_df):
|
||||
root_folder = os.getcwd()
|
||||
zip_folder = os.path.join(root_folder, "zips", zip_code)
|
||||
|
||||
# Create zip code folder if it doesn't exist
|
||||
os.makedirs(zip_folder, exist_ok=True)
|
||||
|
||||
# Define file paths
|
||||
sold_file = os.path.join(zip_folder, f"{zip_code}_sold.xlsx")
|
||||
pending_file = os.path.join(zip_folder, f"{zip_code}_pending.xlsx")
|
||||
|
||||
# Save individual sold and pending files
|
||||
sold_df.to_excel(sold_file, index=False)
|
||||
pending_df.to_excel(pending_file, index=False)
|
||||
|
||||
|
||||
zip_codes = map(
|
||||
str,
|
||||
[
|
||||
22920,
|
||||
77024,
|
||||
78028,
|
||||
24553,
|
||||
22967,
|
||||
22971,
|
||||
22922,
|
||||
22958,
|
||||
22969,
|
||||
22949,
|
||||
22938,
|
||||
24599,
|
||||
24562,
|
||||
22976,
|
||||
24464,
|
||||
22964,
|
||||
24581,
|
||||
],
|
||||
)
|
||||
|
||||
combined_df = pd.DataFrame()
|
||||
for zip in zip_codes:
|
||||
sold_df = get_property_details(zip, "sold")
|
||||
pending_df = get_property_details(zip, "pending")
|
||||
combined_df = pd.concat([combined_df, sold_df, pending_df], ignore_index=True)
|
||||
output_to_excel(zip, sold_df, pending_df)
|
||||
|
||||
combined_file = os.path.join(os.getcwd(), "zips", "combined.xlsx")
|
||||
combined_df.to_excel(combined_file, index=False)
|
||||
Reference in New Issue
Block a user