From cc76e067b23873725baee47acb3753db1b79d021 Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Mon, 18 Sep 2023 20:01:55 -0500
Subject: [PATCH 1/3] fix: lat/long KeyError

---
 README.md                                     | 2 +-
 homeharvest/core/scrapers/realtor/__init__.py | 4 ++--
 homeharvest/core/scrapers/redfin/__init__.py  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 1790f38..5395dbd 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ properties.to_csv('props.csv', index=False)
 ## Output
 ```py
 >>> properties.head()
-                           street   city  ... mls_id description
+                           street        city  ... mls_id description
 0                 420 N  Scottsdale Rd  Tempe  ...    NaN         NaN
 1                1255 E  University Dr  Tempe  ...    NaN         NaN
 2              1979 E  Rio Salado Pkwy  Tempe  ...    NaN         NaN
diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py
index 0ee873e..f6cd68d 100644
--- a/homeharvest/core/scrapers/realtor/__init__.py
+++ b/homeharvest/core/scrapers/realtor/__init__.py
@@ -249,8 +249,8 @@ class RealtorScraper(Scraper):
                     unit=parse_unit(result["location"]["address"]["unit"]),
                     country="USA",
                 ),
-                latitude=result["location"]["address"]["coordinate"]["lat"],
-                longitude=result["location"]["address"]["coordinate"]["lon"],
+                latitude=result["location"]["address"]["coordinate"]["lat"] if result and result.get("location") and result["location"].get("address") and result["location"]["address"].get("coordinate") and "lat" in result["location"]["address"]["coordinate"] else None,
+                longitude=result["location"]["address"]["coordinate"]["lon"] if result and result.get("location") and result["location"].get("address") and result["location"]["address"].get("coordinate") and "lon" in result["location"]["address"]["coordinate"] else None,
                 site_name=self.site_name,
                 property_url="https://www.realtor.com/realestateandhomes-detail/"
                 + result["property_id"],
diff --git a/homeharvest/core/scrapers/redfin/__init__.py b/homeharvest/core/scrapers/redfin/__init__.py
index a3e9e18..d701ff2 100644
--- a/homeharvest/core/scrapers/redfin/__init__.py
+++ b/homeharvest/core/scrapers/redfin/__init__.py
@@ -94,8 +94,8 @@ class RedfinScraper(Scraper):
             price_per_sqft=get_value("pricePerSqFt"),
             price=get_value("price"),
             mls_id=get_value("mlsId"),
-            latitude=home["latLong"]["latitude"] if "latLong" in home else None,
-            longitude=home["latLong"]["longitude"] if "latLong" in home else None,
+            latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
+            longitude = home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None
         )
 
     def _parse_building(self, building: dict) -> Property:

From b01162161d6664b5a20c8737f30ee2d40c128208 Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Mon, 18 Sep 2023 20:09:28 -0500
Subject: [PATCH 2/3] chore: merge

---
 HomeHarvest_Demo.ipynb | 49 ++++++++++++++++++++++++++++++++++++++++--
 README.md              |  6 +++---
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/HomeHarvest_Demo.ipynb b/HomeHarvest_Demo.ipynb
index fc0dceb..a9e8f12 100644
--- a/HomeHarvest_Demo.ipynb
+++ b/HomeHarvest_Demo.ipynb
@@ -31,11 +31,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# scrapes all 3 sites by default\n",
     "scrape_property(\n",
-    "    location=\"dallas\", site_name=\"zillow\", listing_type=\"for_sale\"\n",
+    "    location=\"dallas\",\n",
+    "    listing_type=\"for_sale\"\n",
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aaf86093",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# search a specific address\n",
+    "scrape_property(\n",
+    "    location=\"2530 Al Lipscomb Way\",\n",
+    "    site_name=\"zillow\",\n",
+    "    listing_type=\"for_sale\"\n",
+    "),"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -43,8 +65,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# check rentals\n",
     "scrape_property(\n",
-    "    location=\"dallas\", site_name=\"redfin\", listing_type=\"for_sale\"\n",
+    "    location=\"chicago\",\n",
+    "    site_name=[\"redfin\", \"realtor.com\"],\n",
+    "    listing_type=\"for_rent\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af280cd3",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# check sold properties\n",
+    "scrape_property(\n",
+    "    location=\"chicago, illinois\",\n",
+    "    site_name=[\"redfin\"],\n",
+    "    listing_type=\"sold\"\n",
     ")"
    ]
   }
diff --git a/README.md b/README.md
index d7a9285..7d1e66f 100644
--- a/README.md
+++ b/README.md
@@ -37,8 +37,8 @@ print(properties)
 ```
 ## Output
 ```py
->>> properties.head()
-                           street        city  ... mls_id description
+>> properties.head()
+                                street   city  ... mls_id description
 0                 420 N  Scottsdale Rd  Tempe  ...    NaN         NaN
 1                1255 E  University Dr  Tempe  ...    NaN         NaN
 2              1979 E  Rio Salado Pkwy  Tempe  ...    NaN         NaN
@@ -118,7 +118,7 @@ The following exceptions may be raised when using HomeHarvest:
 - `InvalidSite` - valid options: `zillow`, `redfin`, `realtor.com`
 - `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
 - `NoResultsFound` - no properties found from your input
-- `GeoCoordsNotFound` - if Zillow scraper is not able to create geo-coordinates from the location you input
+- `GeoCoordsNotFound` - if Zillow scraper is not able to find the geo-coordinates from the `location`
 
 ## Frequently Asked Questions
 

From 2eec389838d3179dd863d90697e1165a69bf1346 Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Mon, 18 Sep 2023 21:02:12 -0500
Subject: [PATCH 3/3] docs: add logo

---
 README.md               | 14 ++++++--------
 homeharvest/__init__.py |  7 +++----
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 7d1e66f..757f074 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
-# HomeHarvest
+<img src="https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/d1a2bf8b-09f5-4c57-b33a-0ada8a34f12d" width="400">
 
-**HomeHarvest** is a simple but comprehensive real estate scraping library.
+**HomeHarvest** is a simple, yet comprehensive, real estate scraping library.
 
 [![Try with Replit](https://replit.com/badge?caption=Try%20with%20Replit)](https://replit.com/@ZacharyHampton/HomeHarvestDemo)
 
-
 *Looking to build a data-focused software product?* **[Book a call](https://calendly.com/zachary-products/15min)** *to work with us.*
 ## Features
 
 
+
 - Scrapes properties from **Zillow**, **Realtor.com** & **Redfin** simultaneously
 - Aggregates the properties in a Pandas DataFrame
 
@@ -32,13 +32,11 @@ properties: pd.DataFrame = scrape_property(
 
 #: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel().
 print(properties)
-
-
 ```
 ## Output
 ```py
->> properties.head()
-                                street   city  ... mls_id description
+>>> properties.head()
+                           street   city  ... mls_id description
 0                 420 N  Scottsdale Rd  Tempe  ...    NaN         NaN
 1                1255 E  University Dr  Tempe  ...    NaN         NaN
 2              1979 E  Rio Salado Pkwy  Tempe  ...    NaN         NaN
@@ -118,7 +116,7 @@ The following exceptions may be raised when using HomeHarvest:
 - `InvalidSite` - valid options: `zillow`, `redfin`, `realtor.com`
 - `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
 - `NoResultsFound` - no properties found from your input
-- `GeoCoordsNotFound` - if Zillow scraper is not able to find the geo-coordinates from the `location`
+- `GeoCoordsNotFound` - if Zillow scraper is not able to create geo-coordinates from the location you input
 
 ## Frequently Asked Questions
 
diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py
index ca6d7eb..e2f7f2a 100644
--- a/homeharvest/__init__.py
+++ b/homeharvest/__init__.py
@@ -17,7 +17,6 @@ _scrapers = {
     "zillow": ZillowScraper,
 }
 
-
 def validate_input(site_name: str, listing_type: str) -> None:
     if site_name.lower() not in _scrapers:
         raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
@@ -27,7 +26,6 @@ def validate_input(site_name: str, listing_type: str) -> None:
             f"Provided listing type, '{listing_type}', does not exist."
         )
 
-
 def get_ordered_properties(result: Property) -> list[str]:
     return [
         "property_url",
@@ -67,7 +65,6 @@ def get_ordered_properties(result: Property) -> list[str]:
         "longitude",
     ]
 
-
 def process_result(result: Property) -> pd.DataFrame:
     prop_data = result.__dict__
 
@@ -93,7 +90,6 @@ def process_result(result: Property) -> pd.DataFrame:
 
     return properties_df
 
-
 def _scrape_single_site(
     location: str, site_name: str, listing_type: str
 ) -> pd.DataFrame:
@@ -112,6 +108,7 @@ def _scrape_single_site(
     results = site.search()
 
     properties_dfs = [process_result(result) for result in results]
+    properties_dfs = [df.dropna(axis=1, how='all') for df in properties_dfs if not df.empty]
     if not properties_dfs:
         return pd.DataFrame()
 
@@ -154,6 +151,8 @@ def scrape_property(
                 result = future.result()
                 results.append(result)
 
+    results = [df for df in results if not df.empty and not df.isna().all().all()]
+
     if not results:
         return pd.DataFrame()