mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 11:34:32 -08:00
- readme
This commit is contained in:
206
README.md
206
README.md
@@ -7,9 +7,13 @@
|
|||||||
|
|
||||||
## HomeHarvest Features
|
## HomeHarvest Features
|
||||||
|
|
||||||
- **Source**: Fetches properties directly from **Realtor.com**.
|
- **Source**: Fetches properties directly from **Realtor.com**
|
||||||
- **Data Format**: Structures data to resemble MLS listings.
|
- **Data Format**: Structures data to resemble MLS listings
|
||||||
- **Export Flexibility**: Options to save as either CSV or Excel.
|
- **Export Options**: Save as CSV, Excel, or return as Pandas/Pydantic/Raw
|
||||||
|
- **Flexible Filtering**: Filter by beds, baths, price, sqft, lot size, year built
|
||||||
|
- **Time-Based Queries**: Search by hours, days, or specific date ranges
|
||||||
|
- **Multiple Listing Types**: Query for_sale, for_rent, sold, pending, or all at once
|
||||||
|
- **Sorting**: Sort results by price, date, size, or last update
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -26,212 +30,66 @@ pip install -U homeharvest
|
|||||||
|
|
||||||
```py
|
```py
|
||||||
from homeharvest import scrape_property
|
from homeharvest import scrape_property
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# Generate filename based on current timestamp
|
|
||||||
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
filename = f"HomeHarvest_{current_timestamp}.csv"
|
|
||||||
|
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="San Diego, CA",
|
location="San Diego, CA",
|
||||||
listing_type="sold", # or for_sale, for_rent, pending, ["for_sale", "sold"], None (all types)
|
listing_type="sold", # for_sale, for_rent, pending
|
||||||
past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
|
past_days=30
|
||||||
|
|
||||||
# property_type=['single_family','multi_family'],
|
|
||||||
# date_from="2023-05-01", # alternative to past_days
|
|
||||||
# date_to="2023-05-28",
|
|
||||||
# foreclosure=True
|
|
||||||
# mls_only=True, # only fetch MLS listings
|
|
||||||
# updated_in_past_hours=24, # filter by last_update_date
|
|
||||||
# sort_by="last_update_date", # sort by last update
|
|
||||||
)
|
)
|
||||||
print(f"Number of properties: {len(properties)}")
|
|
||||||
|
|
||||||
# Export to csv
|
properties.to_csv("results.csv", index=False)
|
||||||
properties.to_csv(filename, index=False)
|
print(f"Found {len(properties)} properties")
|
||||||
print(properties.head())
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Flexible Location Formats
|
### Flexible Location Formats
|
||||||
```py
|
```py
|
||||||
# HomeHarvest supports any of these location formats:
|
# Accepts: zip code, city, "city, state", full address, etc.
|
||||||
properties = scrape_property(location="92104") # Just zip code
|
|
||||||
properties = scrape_property(location="San Diego") # Just city
|
|
||||||
properties = scrape_property(location="San Diego, CA") # City, state
|
|
||||||
properties = scrape_property(location="San Diego, California") # Full state name
|
|
||||||
properties = scrape_property(location="1234 Main St, San Diego, CA 92104") # Full address
|
|
||||||
|
|
||||||
# You can also search for properties within a radius of a specific address
|
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="1234 Main St, San Diego, CA 92104",
|
location="San Diego, CA", # or "92104", "San Diego", "1234 Main St, San Diego, CA 92104"
|
||||||
radius=5.0 # 5 mile radius
|
radius=5.0 # Optional: search within radius (miles) of address
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Advanced Filtering Examples
|
### Advanced Filtering Examples
|
||||||
|
|
||||||
#### Hour-Based Filtering
|
#### Time-Based Filtering
|
||||||
```py
|
```py
|
||||||
# Get properties listed in the last 24 hours
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Filter by hours or use datetime/timedelta objects
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="Austin, TX",
|
location="Austin, TX",
|
||||||
listing_type="for_sale",
|
listing_type="for_sale",
|
||||||
past_hours=24
|
past_hours=24, # or timedelta(hours=24) for Pythonic approach
|
||||||
)
|
# date_from=datetime.now() - timedelta(days=7), # Alternative: datetime objects
|
||||||
|
# date_to=datetime.now(), # Automatic hour precision detection
|
||||||
# Get properties listed during specific hours (e.g., business hours)
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Dallas, TX",
|
|
||||||
listing_type="for_sale",
|
|
||||||
date_from="2025-01-20T09:00:00", # Hour precision automatically detected
|
|
||||||
date_to="2025-01-20T17:00:00"
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Property Filters
|
#### Property Filters
|
||||||
```py
|
```py
|
||||||
# Filter by bedrooms, bathrooms, and square footage
|
# Combine any filters: beds, baths, sqft, price, lot_sqft, year_built
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="San Francisco, CA",
|
location="San Francisco, CA",
|
||||||
listing_type="for_sale",
|
listing_type="for_sale",
|
||||||
beds_min=2,
|
beds_min=3, beds_max=5,
|
||||||
beds_max=4,
|
|
||||||
baths_min=2.0,
|
baths_min=2.0,
|
||||||
sqft_min=1000,
|
sqft_min=1500, sqft_max=3000,
|
||||||
sqft_max=2500
|
price_min=300000, price_max=800000,
|
||||||
)
|
|
||||||
|
|
||||||
# Filter by price range
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Phoenix, AZ",
|
|
||||||
listing_type="for_sale",
|
|
||||||
price_min=200000,
|
|
||||||
price_max=500000
|
|
||||||
)
|
|
||||||
|
|
||||||
# Filter by year built
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Seattle, WA",
|
|
||||||
listing_type="for_sale",
|
|
||||||
year_built_min=2000,
|
year_built_min=2000,
|
||||||
beds_min=3
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combine multiple filters
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Denver, CO",
|
|
||||||
listing_type="for_sale",
|
|
||||||
beds_min=3,
|
|
||||||
baths_min=2.0,
|
|
||||||
sqft_min=1500,
|
|
||||||
price_min=300000,
|
|
||||||
price_max=600000,
|
|
||||||
year_built_min=1990,
|
|
||||||
lot_sqft_min=5000
|
lot_sqft_min=5000
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Sorting Results
|
#### Sorting & Listing Types
|
||||||
```py
|
```py
|
||||||
# Sort by price (cheapest first)
|
# Sort options: list_price, list_date, sqft, beds, baths, last_update_date
|
||||||
|
# Listing types: "for_sale", "for_rent", "sold", "pending", list, or None (all)
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="Miami, FL",
|
location="Miami, FL",
|
||||||
listing_type="for_sale",
|
listing_type=["for_sale", "pending"], # Single string, list, or None
|
||||||
sort_by="list_price",
|
sort_by="list_price", # Sort field
|
||||||
sort_direction="asc",
|
sort_direction="asc", # "asc" or "desc"
|
||||||
limit=100
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sort by newest listings
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Boston, MA",
|
|
||||||
listing_type="for_sale",
|
|
||||||
sort_by="list_date",
|
|
||||||
sort_direction="desc"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sort by square footage (largest first)
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Los Angeles, CA",
|
|
||||||
listing_type="for_sale",
|
|
||||||
sort_by="sqft",
|
|
||||||
sort_direction="desc"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sort by most recently updated
|
|
||||||
properties = scrape_property(
|
|
||||||
location="New York, NY",
|
|
||||||
listing_type="for_sale",
|
|
||||||
sort_by="last_update_date",
|
|
||||||
sort_direction="desc"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Multiple Listing Types
|
|
||||||
```py
|
|
||||||
# Get both for_sale and pending properties
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Austin, TX",
|
|
||||||
listing_type=["for_sale", "pending"], # Returns properties matching ANY status
|
|
||||||
limit=100
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get all listing types
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Seattle, WA",
|
|
||||||
listing_type=None, # Returns for_sale, for_rent, sold, pending, etc.
|
|
||||||
limit=100
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Filter by Last Update Date
|
|
||||||
```py
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
# Get properties updated in the last 24 hours
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Miami, FL",
|
|
||||||
listing_type="for_sale",
|
|
||||||
updated_in_past_hours=24,
|
|
||||||
sort_by="last_update_date",
|
|
||||||
sort_direction="desc"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get properties updated since a specific date/time
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Chicago, IL",
|
|
||||||
listing_type="for_sale",
|
|
||||||
updated_since=datetime(2025, 11, 10, 9, 0), # datetime object
|
|
||||||
limit=100
|
|
||||||
)
|
|
||||||
|
|
||||||
# Or use ISO string
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Portland, OR",
|
|
||||||
listing_type="for_sale",
|
|
||||||
updated_since="2025-11-10T09:00:00", # ISO string
|
|
||||||
limit=100
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Pythonic Time Filtering
|
|
||||||
```py
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
# Use timedelta objects for more readable code
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Denver, CO",
|
|
||||||
listing_type="for_sale",
|
|
||||||
past_hours=timedelta(hours=6), # More Pythonic than past_hours=6
|
|
||||||
limit=100
|
|
||||||
)
|
|
||||||
|
|
||||||
# Use datetime objects for precise time ranges
|
|
||||||
properties = scrape_property(
|
|
||||||
location="Phoenix, AZ",
|
|
||||||
listing_type="for_sale",
|
|
||||||
date_from=datetime.now() - timedelta(days=7), # datetime object - hour precision
|
|
||||||
date_to=datetime.now(),
|
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|||||||
Reference in New Issue
Block a user