From 1f1ca8068ff563d36a4844f73cc63e506279c78c Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Mon, 2 Oct 2023 10:28:13 -0700 Subject: [PATCH] - realtor.com default --- README.md | 30 +++++++++++++++--------------- homeharvest/__init__.py | 8 +++++--- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 74fade3..87d9819 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,21 @@ pip install homeharvest ## Usage +### Python + +```py +from homeharvest import scrape_property +import pandas as pd + +properties: pd.DataFrame = scrape_property( + location="85281", + listing_type="for_rent" # for_sale / sold +) + +#: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel(). +print(properties) +``` + ### CLI ```bash @@ -44,21 +59,6 @@ By default: - If `-f` or `--filename` is left blank, the default is `HomeHarvest_`. - If `-p` or `--proxy` is not provided, the scraper uses the local IP. - Use `-k` or `--keep_duplicates` to keep duplicate properties based on address. If not provided, duplicates will be removed. -### Python - -```py -from homeharvest import scrape_property -import pandas as pd - -properties: pd.DataFrame = scrape_property( - site_name=["zillow", "realtor.com", "redfin"], - location="85281", - listing_type="for_rent" # for_sale / sold -) - -#: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel(). -print(properties) -``` ## Output ```py diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 2b60e3b..8fe7d0d 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -132,7 +132,7 @@ def _scrape_single_site(location: str, site_name: str, listing_type: str, proxy: def scrape_property( location: str, - site_name: Union[str, list[str]] = None, + site_name: Union[str, list[str]] = "realtor.com", listing_type: str = "for_sale", proxy: str = None, keep_duplicates: bool = False @@ -140,12 +140,14 @@ def scrape_property( """ Scrape property from various sites from a given location and listing type. - :returns: pd.DataFrame + :param keep_duplicates: + :param proxy: :param location: US Location (e.g. 'San Francisco, CA', 'Cook County, IL', '85281', '2530 Al Lipscomb Way') :param site_name: Site name or list of site names (e.g. ['realtor.com', 'zillow'], 'redfin') :param listing_type: Listing type (e.g. 'for_sale', 'for_rent', 'sold') - :return: pd.DataFrame containing properties + :returns: pd.DataFrame containing properties """ + if site_name is None: site_name = list(_scrapers.keys())