From 2eec389838d3179dd863d90697e1165a69bf1346 Mon Sep 17 00:00:00 2001 From: Cullen Watson Date: Mon, 18 Sep 2023 21:02:12 -0500 Subject: [PATCH] docs: add logo --- README.md | 14 ++++++-------- homeharvest/__init__.py | 7 +++---- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7d1e66f..757f074 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ -# HomeHarvest + -**HomeHarvest** is a simple but comprehensive real estate scraping library. +**HomeHarvest** is a simple, yet comprehensive, real estate scraping library. [![Try with Replit](https://replit.com/badge?caption=Try%20with%20Replit)](https://replit.com/@ZacharyHampton/HomeHarvestDemo) - *Looking to build a data-focused software product?* **[Book a call](https://calendly.com/zachary-products/15min)** *to work with us.* ## Features + - Scrapes properties from **Zillow**, **Realtor.com** & **Redfin** simultaneously - Aggregates the properties in a Pandas DataFrame @@ -32,13 +32,11 @@ properties: pd.DataFrame = scrape_property( #: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel(). print(properties) - - ``` ## Output ```py ->> properties.head() - street city ... mls_id description +>>> properties.head() + street city ... mls_id description 0 420 N Scottsdale Rd Tempe ... NaN NaN 1 1255 E University Dr Tempe ... NaN NaN 2 1979 E Rio Salado Pkwy Tempe ... NaN NaN @@ -118,7 +116,7 @@ The following exceptions may be raised when using HomeHarvest: - `InvalidSite` - valid options: `zillow`, `redfin`, `realtor.com` - `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold` - `NoResultsFound` - no properties found from your input -- `GeoCoordsNotFound` - if Zillow scraper is not able to find the geo-coordinates from the `location` +- `GeoCoordsNotFound` - if Zillow scraper is not able to create geo-coordinates from the location you input ## Frequently Asked Questions diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index ca6d7eb..e2f7f2a 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -17,7 +17,6 @@ _scrapers = { "zillow": ZillowScraper, } - def validate_input(site_name: str, listing_type: str) -> None: if site_name.lower() not in _scrapers: raise InvalidSite(f"Provided site, '{site_name}', does not exist.") @@ -27,7 +26,6 @@ def validate_input(site_name: str, listing_type: str) -> None: f"Provided listing type, '{listing_type}', does not exist." ) - def get_ordered_properties(result: Property) -> list[str]: return [ "property_url", @@ -67,7 +65,6 @@ def get_ordered_properties(result: Property) -> list[str]: "longitude", ] - def process_result(result: Property) -> pd.DataFrame: prop_data = result.__dict__ @@ -93,7 +90,6 @@ def process_result(result: Property) -> pd.DataFrame: return properties_df - def _scrape_single_site( location: str, site_name: str, listing_type: str ) -> pd.DataFrame: @@ -112,6 +108,7 @@ def _scrape_single_site( results = site.search() properties_dfs = [process_result(result) for result in results] + properties_dfs = [df.dropna(axis=1, how='all') for df in properties_dfs if not df.empty] if not properties_dfs: return pd.DataFrame() @@ -154,6 +151,8 @@ def scrape_property( result = future.result() results.append(result) + results = [df for df in results if not df.empty and not df.isna().all().all()] + if not results: return pd.DataFrame()