mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 19:44:29 -08:00
- readme
This commit is contained in:
204
README.md
204
README.md
@@ -7,9 +7,13 @@
|
||||
|
||||
## HomeHarvest Features
|
||||
|
||||
- **Source**: Fetches properties directly from **Realtor.com**.
|
||||
- **Data Format**: Structures data to resemble MLS listings.
|
||||
- **Export Flexibility**: Options to save as either CSV or Excel.
|
||||
- **Source**: Fetches properties directly from **Realtor.com**
|
||||
- **Data Format**: Structures data to resemble MLS listings
|
||||
- **Export Options**: Save as CSV, Excel, or return as Pandas/Pydantic/Raw
|
||||
- **Flexible Filtering**: Filter by beds, baths, price, sqft, lot size, year built
|
||||
- **Time-Based Queries**: Search by hours, days, or specific date ranges
|
||||
- **Multiple Listing Types**: Query for_sale, for_rent, sold, pending, or all at once
|
||||
- **Sorting**: Sort results by price, date, size, or last update
|
||||
|
||||

|
||||
|
||||
@@ -26,212 +30,66 @@ pip install -U homeharvest
|
||||
|
||||
```py
|
||||
from homeharvest import scrape_property
|
||||
from datetime import datetime
|
||||
|
||||
# Generate filename based on current timestamp
|
||||
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"HomeHarvest_{current_timestamp}.csv"
|
||||
|
||||
properties = scrape_property(
|
||||
location="San Diego, CA",
|
||||
listing_type="sold", # or for_sale, for_rent, pending, ["for_sale", "sold"], None (all types)
|
||||
past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
|
||||
|
||||
# property_type=['single_family','multi_family'],
|
||||
# date_from="2023-05-01", # alternative to past_days
|
||||
# date_to="2023-05-28",
|
||||
# foreclosure=True
|
||||
# mls_only=True, # only fetch MLS listings
|
||||
# updated_in_past_hours=24, # filter by last_update_date
|
||||
# sort_by="last_update_date", # sort by last update
|
||||
listing_type="sold", # for_sale, for_rent, pending
|
||||
past_days=30
|
||||
)
|
||||
print(f"Number of properties: {len(properties)}")
|
||||
|
||||
# Export to csv
|
||||
properties.to_csv(filename, index=False)
|
||||
print(properties.head())
|
||||
properties.to_csv("results.csv", index=False)
|
||||
print(f"Found {len(properties)} properties")
|
||||
```
|
||||
|
||||
### Flexible Location Formats
|
||||
```py
|
||||
# HomeHarvest supports any of these location formats:
|
||||
properties = scrape_property(location="92104") # Just zip code
|
||||
properties = scrape_property(location="San Diego") # Just city
|
||||
properties = scrape_property(location="San Diego, CA") # City, state
|
||||
properties = scrape_property(location="San Diego, California") # Full state name
|
||||
properties = scrape_property(location="1234 Main St, San Diego, CA 92104") # Full address
|
||||
|
||||
# You can also search for properties within a radius of a specific address
|
||||
# Accepts: zip code, city, "city, state", full address, etc.
|
||||
properties = scrape_property(
|
||||
location="1234 Main St, San Diego, CA 92104",
|
||||
radius=5.0 # 5 mile radius
|
||||
location="San Diego, CA", # or "92104", "San Diego", "1234 Main St, San Diego, CA 92104"
|
||||
radius=5.0 # Optional: search within radius (miles) of address
|
||||
)
|
||||
```
|
||||
|
||||
### Advanced Filtering Examples
|
||||
|
||||
#### Hour-Based Filtering
|
||||
#### Time-Based Filtering
|
||||
```py
|
||||
# Get properties listed in the last 24 hours
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Filter by hours or use datetime/timedelta objects
|
||||
properties = scrape_property(
|
||||
location="Austin, TX",
|
||||
listing_type="for_sale",
|
||||
past_hours=24
|
||||
)
|
||||
|
||||
# Get properties listed during specific hours (e.g., business hours)
|
||||
properties = scrape_property(
|
||||
location="Dallas, TX",
|
||||
listing_type="for_sale",
|
||||
date_from="2025-01-20T09:00:00", # Hour precision automatically detected
|
||||
date_to="2025-01-20T17:00:00"
|
||||
past_hours=24, # or timedelta(hours=24) for Pythonic approach
|
||||
# date_from=datetime.now() - timedelta(days=7), # Alternative: datetime objects
|
||||
# date_to=datetime.now(), # Automatic hour precision detection
|
||||
)
|
||||
```
|
||||
|
||||
#### Property Filters
|
||||
```py
|
||||
# Filter by bedrooms, bathrooms, and square footage
|
||||
# Combine any filters: beds, baths, sqft, price, lot_sqft, year_built
|
||||
properties = scrape_property(
|
||||
location="San Francisco, CA",
|
||||
listing_type="for_sale",
|
||||
beds_min=2,
|
||||
beds_max=4,
|
||||
beds_min=3, beds_max=5,
|
||||
baths_min=2.0,
|
||||
sqft_min=1000,
|
||||
sqft_max=2500
|
||||
)
|
||||
|
||||
# Filter by price range
|
||||
properties = scrape_property(
|
||||
location="Phoenix, AZ",
|
||||
listing_type="for_sale",
|
||||
price_min=200000,
|
||||
price_max=500000
|
||||
)
|
||||
|
||||
# Filter by year built
|
||||
properties = scrape_property(
|
||||
location="Seattle, WA",
|
||||
listing_type="for_sale",
|
||||
sqft_min=1500, sqft_max=3000,
|
||||
price_min=300000, price_max=800000,
|
||||
year_built_min=2000,
|
||||
beds_min=3
|
||||
)
|
||||
|
||||
# Combine multiple filters
|
||||
properties = scrape_property(
|
||||
location="Denver, CO",
|
||||
listing_type="for_sale",
|
||||
beds_min=3,
|
||||
baths_min=2.0,
|
||||
sqft_min=1500,
|
||||
price_min=300000,
|
||||
price_max=600000,
|
||||
year_built_min=1990,
|
||||
lot_sqft_min=5000
|
||||
)
|
||||
```
|
||||
|
||||
#### Sorting Results
|
||||
#### Sorting & Listing Types
|
||||
```py
|
||||
# Sort by price (cheapest first)
|
||||
# Sort options: list_price, list_date, sqft, beds, baths, last_update_date
|
||||
# Listing types: "for_sale", "for_rent", "sold", "pending", list, or None (all)
|
||||
properties = scrape_property(
|
||||
location="Miami, FL",
|
||||
listing_type="for_sale",
|
||||
sort_by="list_price",
|
||||
sort_direction="asc",
|
||||
limit=100
|
||||
)
|
||||
|
||||
# Sort by newest listings
|
||||
properties = scrape_property(
|
||||
location="Boston, MA",
|
||||
listing_type="for_sale",
|
||||
sort_by="list_date",
|
||||
sort_direction="desc"
|
||||
)
|
||||
|
||||
# Sort by square footage (largest first)
|
||||
properties = scrape_property(
|
||||
location="Los Angeles, CA",
|
||||
listing_type="for_sale",
|
||||
sort_by="sqft",
|
||||
sort_direction="desc"
|
||||
)
|
||||
|
||||
# Sort by most recently updated
|
||||
properties = scrape_property(
|
||||
location="New York, NY",
|
||||
listing_type="for_sale",
|
||||
sort_by="last_update_date",
|
||||
sort_direction="desc"
|
||||
)
|
||||
```
|
||||
|
||||
#### Multiple Listing Types
|
||||
```py
|
||||
# Get both for_sale and pending properties
|
||||
properties = scrape_property(
|
||||
location="Austin, TX",
|
||||
listing_type=["for_sale", "pending"], # Returns properties matching ANY status
|
||||
limit=100
|
||||
)
|
||||
|
||||
# Get all listing types
|
||||
properties = scrape_property(
|
||||
location="Seattle, WA",
|
||||
listing_type=None, # Returns for_sale, for_rent, sold, pending, etc.
|
||||
limit=100
|
||||
)
|
||||
```
|
||||
|
||||
#### Filter by Last Update Date
|
||||
```py
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Get properties updated in the last 24 hours
|
||||
properties = scrape_property(
|
||||
location="Miami, FL",
|
||||
listing_type="for_sale",
|
||||
updated_in_past_hours=24,
|
||||
sort_by="last_update_date",
|
||||
sort_direction="desc"
|
||||
)
|
||||
|
||||
# Get properties updated since a specific date/time
|
||||
properties = scrape_property(
|
||||
location="Chicago, IL",
|
||||
listing_type="for_sale",
|
||||
updated_since=datetime(2025, 11, 10, 9, 0), # datetime object
|
||||
limit=100
|
||||
)
|
||||
|
||||
# Or use ISO string
|
||||
properties = scrape_property(
|
||||
location="Portland, OR",
|
||||
listing_type="for_sale",
|
||||
updated_since="2025-11-10T09:00:00", # ISO string
|
||||
limit=100
|
||||
)
|
||||
```
|
||||
|
||||
#### Pythonic Time Filtering
|
||||
```py
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Use timedelta objects for more readable code
|
||||
properties = scrape_property(
|
||||
location="Denver, CO",
|
||||
listing_type="for_sale",
|
||||
past_hours=timedelta(hours=6), # More Pythonic than past_hours=6
|
||||
limit=100
|
||||
)
|
||||
|
||||
# Use datetime objects for precise time ranges
|
||||
properties = scrape_property(
|
||||
location="Phoenix, AZ",
|
||||
listing_type="for_sale",
|
||||
date_from=datetime.now() - timedelta(days=7), # datetime object - hour precision
|
||||
date_to=datetime.now(),
|
||||
listing_type=["for_sale", "pending"], # Single string, list, or None
|
||||
sort_by="list_price", # Sort field
|
||||
sort_direction="asc", # "asc" or "desc"
|
||||
limit=100
|
||||
)
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user