From a8926915b6678d6d2df508fe35a7cc69a6d63a7d Mon Sep 17 00:00:00 2001 From: Zachary Hampton Date: Tue, 11 Nov 2025 14:33:06 -0800 Subject: [PATCH] - readme --- README.md | 206 +++++++++--------------------------------------------- 1 file changed, 32 insertions(+), 174 deletions(-) diff --git a/README.md b/README.md index 049e9db..f50e73e 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,13 @@ ## HomeHarvest Features -- **Source**: Fetches properties directly from **Realtor.com**. -- **Data Format**: Structures data to resemble MLS listings. -- **Export Flexibility**: Options to save as either CSV or Excel. +- **Source**: Fetches properties directly from **Realtor.com** +- **Data Format**: Structures data to resemble MLS listings +- **Export Options**: Save as CSV, Excel, or return as Pandas/Pydantic/Raw +- **Flexible Filtering**: Filter by beds, baths, price, sqft, lot size, year built +- **Time-Based Queries**: Search by hours, days, or specific date ranges +- **Multiple Listing Types**: Query for_sale, for_rent, sold, pending, or all at once +- **Sorting**: Sort results by price, date, size, or last update ![homeharvest](https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/b3d5d727-e67b-4a9f-85d8-1e65fd18620a) @@ -26,212 +30,66 @@ pip install -U homeharvest ```py from homeharvest import scrape_property -from datetime import datetime - -# Generate filename based on current timestamp -current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") -filename = f"HomeHarvest_{current_timestamp}.csv" properties = scrape_property( - location="San Diego, CA", - listing_type="sold", # or for_sale, for_rent, pending, ["for_sale", "sold"], None (all types) - past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent) - - # property_type=['single_family','multi_family'], - # date_from="2023-05-01", # alternative to past_days - # date_to="2023-05-28", - # foreclosure=True - # mls_only=True, # only fetch MLS listings - # updated_in_past_hours=24, # filter by last_update_date - # sort_by="last_update_date", # sort by last update + location="San Diego, CA", + listing_type="sold", # for_sale, for_rent, pending + past_days=30 ) -print(f"Number of properties: {len(properties)}") -# Export to csv -properties.to_csv(filename, index=False) -print(properties.head()) +properties.to_csv("results.csv", index=False) +print(f"Found {len(properties)} properties") ``` ### Flexible Location Formats ```py -# HomeHarvest supports any of these location formats: -properties = scrape_property(location="92104") # Just zip code -properties = scrape_property(location="San Diego") # Just city -properties = scrape_property(location="San Diego, CA") # City, state -properties = scrape_property(location="San Diego, California") # Full state name -properties = scrape_property(location="1234 Main St, San Diego, CA 92104") # Full address - -# You can also search for properties within a radius of a specific address +# Accepts: zip code, city, "city, state", full address, etc. properties = scrape_property( - location="1234 Main St, San Diego, CA 92104", - radius=5.0 # 5 mile radius + location="San Diego, CA", # or "92104", "San Diego", "1234 Main St, San Diego, CA 92104" + radius=5.0 # Optional: search within radius (miles) of address ) ``` ### Advanced Filtering Examples -#### Hour-Based Filtering +#### Time-Based Filtering ```py -# Get properties listed in the last 24 hours +from datetime import datetime, timedelta + +# Filter by hours or use datetime/timedelta objects properties = scrape_property( location="Austin, TX", listing_type="for_sale", - past_hours=24 -) - -# Get properties listed during specific hours (e.g., business hours) -properties = scrape_property( - location="Dallas, TX", - listing_type="for_sale", - date_from="2025-01-20T09:00:00", # Hour precision automatically detected - date_to="2025-01-20T17:00:00" + past_hours=24, # or timedelta(hours=24) for Pythonic approach + # date_from=datetime.now() - timedelta(days=7), # Alternative: datetime objects + # date_to=datetime.now(), # Automatic hour precision detection ) ``` #### Property Filters ```py -# Filter by bedrooms, bathrooms, and square footage +# Combine any filters: beds, baths, sqft, price, lot_sqft, year_built properties = scrape_property( location="San Francisco, CA", listing_type="for_sale", - beds_min=2, - beds_max=4, + beds_min=3, beds_max=5, baths_min=2.0, - sqft_min=1000, - sqft_max=2500 -) - -# Filter by price range -properties = scrape_property( - location="Phoenix, AZ", - listing_type="for_sale", - price_min=200000, - price_max=500000 -) - -# Filter by year built -properties = scrape_property( - location="Seattle, WA", - listing_type="for_sale", + sqft_min=1500, sqft_max=3000, + price_min=300000, price_max=800000, year_built_min=2000, - beds_min=3 -) - -# Combine multiple filters -properties = scrape_property( - location="Denver, CO", - listing_type="for_sale", - beds_min=3, - baths_min=2.0, - sqft_min=1500, - price_min=300000, - price_max=600000, - year_built_min=1990, lot_sqft_min=5000 ) ``` -#### Sorting Results +#### Sorting & Listing Types ```py -# Sort by price (cheapest first) +# Sort options: list_price, list_date, sqft, beds, baths, last_update_date +# Listing types: "for_sale", "for_rent", "sold", "pending", list, or None (all) properties = scrape_property( location="Miami, FL", - listing_type="for_sale", - sort_by="list_price", - sort_direction="asc", - limit=100 -) - -# Sort by newest listings -properties = scrape_property( - location="Boston, MA", - listing_type="for_sale", - sort_by="list_date", - sort_direction="desc" -) - -# Sort by square footage (largest first) -properties = scrape_property( - location="Los Angeles, CA", - listing_type="for_sale", - sort_by="sqft", - sort_direction="desc" -) - -# Sort by most recently updated -properties = scrape_property( - location="New York, NY", - listing_type="for_sale", - sort_by="last_update_date", - sort_direction="desc" -) -``` - -#### Multiple Listing Types -```py -# Get both for_sale and pending properties -properties = scrape_property( - location="Austin, TX", - listing_type=["for_sale", "pending"], # Returns properties matching ANY status - limit=100 -) - -# Get all listing types -properties = scrape_property( - location="Seattle, WA", - listing_type=None, # Returns for_sale, for_rent, sold, pending, etc. - limit=100 -) -``` - -#### Filter by Last Update Date -```py -from datetime import datetime, timedelta - -# Get properties updated in the last 24 hours -properties = scrape_property( - location="Miami, FL", - listing_type="for_sale", - updated_in_past_hours=24, - sort_by="last_update_date", - sort_direction="desc" -) - -# Get properties updated since a specific date/time -properties = scrape_property( - location="Chicago, IL", - listing_type="for_sale", - updated_since=datetime(2025, 11, 10, 9, 0), # datetime object - limit=100 -) - -# Or use ISO string -properties = scrape_property( - location="Portland, OR", - listing_type="for_sale", - updated_since="2025-11-10T09:00:00", # ISO string - limit=100 -) -``` - -#### Pythonic Time Filtering -```py -from datetime import datetime, timedelta - -# Use timedelta objects for more readable code -properties = scrape_property( - location="Denver, CO", - listing_type="for_sale", - past_hours=timedelta(hours=6), # More Pythonic than past_hours=6 - limit=100 -) - -# Use datetime objects for precise time ranges -properties = scrape_property( - location="Phoenix, AZ", - listing_type="for_sale", - date_from=datetime.now() - timedelta(days=7), # datetime object - hour precision - date_to=datetime.now(), + listing_type=["for_sale", "pending"], # Single string, list, or None + sort_by="list_price", # Sort field + sort_direction="asc", # "asc" or "desc" limit=100 ) ```