parent
6bb68766fc
commit
de692faae2
11
README.md
11
README.md
|
@ -61,7 +61,7 @@ options:
|
||||||
> homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest
|
> homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest
|
||||||
```
|
```
|
||||||
|
|
||||||
### Python
|
### Python
|
||||||
|
|
||||||
```py
|
```py
|
||||||
from homeharvest import scrape_property
|
from homeharvest import scrape_property
|
||||||
|
@ -72,10 +72,10 @@ current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
filename = f"output/{current_timestamp}.csv"
|
filename = f"output/{current_timestamp}.csv"
|
||||||
|
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="San Diego, CA",
|
location="San Diego, CA",
|
||||||
listing_type="sold", # or (for_sale, for_rent)
|
listing_type="sold", # or (for_sale, for_rent)
|
||||||
last_x_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
|
property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
|
||||||
mls_only=True, # only fetch MLS listings
|
mls_only=True, # only fetch MLS listings
|
||||||
)
|
)
|
||||||
print(f"Number of properties: {len(properties)}")
|
print(f"Number of properties: {len(properties)}")
|
||||||
|
|
||||||
|
@ -84,7 +84,6 @@ properties.to_csv(filename, index=False)
|
||||||
print(properties.head())
|
print(properties.head())
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Output
|
## Output
|
||||||
```plaintext
|
```plaintext
|
||||||
>>> properties.head()
|
>>> properties.head()
|
||||||
|
|
|
@ -8,7 +8,7 @@ filename = f"output/{current_timestamp}.csv"
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="San Diego, CA",
|
location="San Diego, CA",
|
||||||
listing_type="sold", # for_sale, for_rent
|
listing_type="sold", # for_sale, for_rent
|
||||||
last_x_days=30, # sold/listed in last 30 days
|
property_younger_than=30, # sold/listed in last 30 days
|
||||||
mls_only=True, # only fetch MLS listings
|
mls_only=True, # only fetch MLS listings
|
||||||
)
|
)
|
||||||
print(f"Number of properties: {len(properties)}")
|
print(f"Number of properties: {len(properties)}")
|
||||||
|
|
|
@ -12,11 +12,17 @@ def scrape_property(
|
||||||
listing_type: str = "for_sale",
|
listing_type: str = "for_sale",
|
||||||
radius: float = None,
|
radius: float = None,
|
||||||
mls_only: bool = False,
|
mls_only: bool = False,
|
||||||
last_x_days: int = None,
|
property_younger_than: int = None,
|
||||||
proxy: str = None,
|
proxy: str = None,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Scrape properties from Realtor.com based on a given location and listing type.
|
Scrape properties from Realtor.com based on a given location and listing type.
|
||||||
|
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
|
||||||
|
:param listing_type: Listing Type (for_sale, for_rent, sold)
|
||||||
|
:param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
|
||||||
|
:param mls_only: If set, fetches only listings with MLS IDs.
|
||||||
|
:param property_younger_than: Get properties sold/listed in last _ days.
|
||||||
|
:param proxy: Proxy to use for scraping
|
||||||
"""
|
"""
|
||||||
validate_input(listing_type)
|
validate_input(listing_type)
|
||||||
|
|
||||||
|
@ -26,7 +32,7 @@ def scrape_property(
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
radius=radius,
|
radius=radius,
|
||||||
mls_only=mls_only,
|
mls_only=mls_only,
|
||||||
last_x_days=last_x_days,
|
last_x_days=property_younger_than,
|
||||||
)
|
)
|
||||||
|
|
||||||
site = RealtorScraper(scraper_input)
|
site = RealtorScraper(scraper_input)
|
||||||
|
|
|
@ -68,7 +68,7 @@ def main():
|
||||||
radius=args.radius,
|
radius=args.radius,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
mls_only=args.mls_only,
|
mls_only=args.mls_only,
|
||||||
last_x_days=args.days,
|
property_younger_than=args.days,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not args.filename:
|
if not args.filename:
|
||||||
|
|
|
@ -9,7 +9,7 @@ def test_realtor_comps():
|
||||||
result = scrape_property(
|
result = scrape_property(
|
||||||
location="2530 Al Lipscomb Way",
|
location="2530 Al Lipscomb Way",
|
||||||
radius=0.5,
|
radius=0.5,
|
||||||
last_x_days=180,
|
property_younger_than=180,
|
||||||
listing_type="sold",
|
listing_type="sold",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -18,11 +18,11 @@ def test_realtor_comps():
|
||||||
|
|
||||||
def test_realtor_last_x_days_sold():
|
def test_realtor_last_x_days_sold():
|
||||||
days_result_30 = scrape_property(
|
days_result_30 = scrape_property(
|
||||||
location="Dallas, TX", listing_type="sold", last_x_days=30
|
location="Dallas, TX", listing_type="sold", property_younger_than=30
|
||||||
)
|
)
|
||||||
|
|
||||||
days_result_10 = scrape_property(
|
days_result_10 = scrape_property(
|
||||||
location="Dallas, TX", listing_type="sold", last_x_days=10
|
location="Dallas, TX", listing_type="sold", property_younger_than=10
|
||||||
)
|
)
|
||||||
|
|
||||||
assert all(
|
assert all(
|
||||||
|
|
Loading…
Reference in New Issue