- realtor.com default
parent
8388d47f73
commit
1f1ca8068f
30
README.md
30
README.md
|
@ -29,6 +29,21 @@ pip install homeharvest
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
### Python
|
||||||
|
|
||||||
|
```py
|
||||||
|
from homeharvest import scrape_property
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
properties: pd.DataFrame = scrape_property(
|
||||||
|
location="85281",
|
||||||
|
listing_type="for_rent" # for_sale / sold
|
||||||
|
)
|
||||||
|
|
||||||
|
#: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel().
|
||||||
|
print(properties)
|
||||||
|
```
|
||||||
|
|
||||||
### CLI
|
### CLI
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -44,21 +59,6 @@ By default:
|
||||||
- If `-f` or `--filename` is left blank, the default is `HomeHarvest_<current_timestamp>`.
|
- If `-f` or `--filename` is left blank, the default is `HomeHarvest_<current_timestamp>`.
|
||||||
- If `-p` or `--proxy` is not provided, the scraper uses the local IP.
|
- If `-p` or `--proxy` is not provided, the scraper uses the local IP.
|
||||||
- Use `-k` or `--keep_duplicates` to keep duplicate properties based on address. If not provided, duplicates will be removed.
|
- Use `-k` or `--keep_duplicates` to keep duplicate properties based on address. If not provided, duplicates will be removed.
|
||||||
### Python
|
|
||||||
|
|
||||||
```py
|
|
||||||
from homeharvest import scrape_property
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
properties: pd.DataFrame = scrape_property(
|
|
||||||
site_name=["zillow", "realtor.com", "redfin"],
|
|
||||||
location="85281",
|
|
||||||
listing_type="for_rent" # for_sale / sold
|
|
||||||
)
|
|
||||||
|
|
||||||
#: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel().
|
|
||||||
print(properties)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Output
|
## Output
|
||||||
```py
|
```py
|
||||||
|
|
|
@ -132,7 +132,7 @@ def _scrape_single_site(location: str, site_name: str, listing_type: str, proxy:
|
||||||
|
|
||||||
def scrape_property(
|
def scrape_property(
|
||||||
location: str,
|
location: str,
|
||||||
site_name: Union[str, list[str]] = None,
|
site_name: Union[str, list[str]] = "realtor.com",
|
||||||
listing_type: str = "for_sale",
|
listing_type: str = "for_sale",
|
||||||
proxy: str = None,
|
proxy: str = None,
|
||||||
keep_duplicates: bool = False
|
keep_duplicates: bool = False
|
||||||
|
@ -140,12 +140,14 @@ def scrape_property(
|
||||||
"""
|
"""
|
||||||
Scrape property from various sites from a given location and listing type.
|
Scrape property from various sites from a given location and listing type.
|
||||||
|
|
||||||
:returns: pd.DataFrame
|
:param keep_duplicates:
|
||||||
|
:param proxy:
|
||||||
:param location: US Location (e.g. 'San Francisco, CA', 'Cook County, IL', '85281', '2530 Al Lipscomb Way')
|
:param location: US Location (e.g. 'San Francisco, CA', 'Cook County, IL', '85281', '2530 Al Lipscomb Way')
|
||||||
:param site_name: Site name or list of site names (e.g. ['realtor.com', 'zillow'], 'redfin')
|
:param site_name: Site name or list of site names (e.g. ['realtor.com', 'zillow'], 'redfin')
|
||||||
:param listing_type: Listing type (e.g. 'for_sale', 'for_rent', 'sold')
|
:param listing_type: Listing type (e.g. 'for_sale', 'for_rent', 'sold')
|
||||||
:return: pd.DataFrame containing properties
|
:returns: pd.DataFrame containing properties
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if site_name is None:
|
if site_name is None:
|
||||||
site_name = list(_scrapers.keys())
|
site_name = list(_scrapers.keys())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue