docs: add logo
parent
b01162161d
commit
2eec389838
12
README.md
12
README.md
|
@ -1,14 +1,14 @@
|
|||
# HomeHarvest
|
||||
<img src="https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/d1a2bf8b-09f5-4c57-b33a-0ada8a34f12d" width="400">
|
||||
|
||||
**HomeHarvest** is a simple but comprehensive real estate scraping library.
|
||||
**HomeHarvest** is a simple, yet comprehensive, real estate scraping library.
|
||||
|
||||
[![Try with Replit](https://replit.com/badge?caption=Try%20with%20Replit)](https://replit.com/@ZacharyHampton/HomeHarvestDemo)
|
||||
|
||||
|
||||
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/zachary-products/15min)** *to work with us.*
|
||||
## Features
|
||||
|
||||
|
||||
|
||||
- Scrapes properties from **Zillow**, **Realtor.com** & **Redfin** simultaneously
|
||||
- Aggregates the properties in a Pandas DataFrame
|
||||
|
||||
|
@ -32,12 +32,10 @@ properties: pd.DataFrame = scrape_property(
|
|||
|
||||
#: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel().
|
||||
print(properties)
|
||||
|
||||
|
||||
```
|
||||
## Output
|
||||
```py
|
||||
>> properties.head()
|
||||
>>> properties.head()
|
||||
street city ... mls_id description
|
||||
0 420 N Scottsdale Rd Tempe ... NaN NaN
|
||||
1 1255 E University Dr Tempe ... NaN NaN
|
||||
|
@ -118,7 +116,7 @@ The following exceptions may be raised when using HomeHarvest:
|
|||
- `InvalidSite` - valid options: `zillow`, `redfin`, `realtor.com`
|
||||
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
|
||||
- `NoResultsFound` - no properties found from your input
|
||||
- `GeoCoordsNotFound` - if Zillow scraper is not able to find the geo-coordinates from the `location`
|
||||
- `GeoCoordsNotFound` - if Zillow scraper is not able to create geo-coordinates from the location you input
|
||||
|
||||
## Frequently Asked Questions
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@ _scrapers = {
|
|||
"zillow": ZillowScraper,
|
||||
}
|
||||
|
||||
|
||||
def validate_input(site_name: str, listing_type: str) -> None:
|
||||
if site_name.lower() not in _scrapers:
|
||||
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
|
||||
|
@ -27,7 +26,6 @@ def validate_input(site_name: str, listing_type: str) -> None:
|
|||
f"Provided listing type, '{listing_type}', does not exist."
|
||||
)
|
||||
|
||||
|
||||
def get_ordered_properties(result: Property) -> list[str]:
|
||||
return [
|
||||
"property_url",
|
||||
|
@ -67,7 +65,6 @@ def get_ordered_properties(result: Property) -> list[str]:
|
|||
"longitude",
|
||||
]
|
||||
|
||||
|
||||
def process_result(result: Property) -> pd.DataFrame:
|
||||
prop_data = result.__dict__
|
||||
|
||||
|
@ -93,7 +90,6 @@ def process_result(result: Property) -> pd.DataFrame:
|
|||
|
||||
return properties_df
|
||||
|
||||
|
||||
def _scrape_single_site(
|
||||
location: str, site_name: str, listing_type: str
|
||||
) -> pd.DataFrame:
|
||||
|
@ -112,6 +108,7 @@ def _scrape_single_site(
|
|||
results = site.search()
|
||||
|
||||
properties_dfs = [process_result(result) for result in results]
|
||||
properties_dfs = [df.dropna(axis=1, how='all') for df in properties_dfs if not df.empty]
|
||||
if not properties_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
|
@ -154,6 +151,8 @@ def scrape_property(
|
|||
result = future.result()
|
||||
results.append(result)
|
||||
|
||||
results = [df for df in results if not df.empty and not df.isna().all().all()]
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
|
|
Loading…
Reference in New Issue