- version bump

- rename days variable
Merge pull request #32 from ZacharyHampton/key_error
2026-03-04 19:44:29 -08:00 · 2023-10-04 21:35:21 -07:00 · 2023-10-04 21:35:14 -07:00 · 2023-10-04 20:35:05 -07:00 · 2023-10-04 22:34:52 -05:00 · 2023-10-04 22:33:21 -05:00
7 changed files with 106 additions and 54 deletions
--- a/README.md
+++ b/README.md
@@ -17,8 +17,9 @@ Check out another project we wrote: ***[JobSpy](https://github.com/cullenwatson/
 - **Data Format**: Structures data to resemble MLS listings.
 - **Export Flexibility**: Options to save as either CSV or Excel.
 - **Usage Modes**:
-  - **CLI**: For users who prefer command-line operations.
  - **Python**: For those who'd like to integrate scraping into their Python scripts.
+  - **CLI**: For users who prefer command-line operations.
+

 [Video Guide for HomeHarvest](https://youtu.be/JnV7eR2Ve2o) - _updated for release v0.2.7_

@@ -33,10 +34,35 @@ pip install homeharvest

 ## Usage

+### Python
+
+```py
+from homeharvest import scrape_property
+from datetime import datetime
+
+# Generate filename based on current timestamp
+current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+filename = f"HomeHarvest_{current_timestamp}.csv"
+
+properties = scrape_property(
+  location="San Diego, CA",
+  listing_type="sold",  # or (for_sale, for_rent)
+  past_days=30,  # sold in last 30 days - listed in last x days if (for_sale, for_rent)
+  # pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
+  # mls_only=True,  # only fetch MLS listings
+  # proxy="http://user:pass@host:port"  # use a proxy to change your IP address
+)
+print(f"Number of properties: {len(properties)}")
+
+# Export to csv
+properties.to_csv(filename, index=False)
+print(properties.head())
+```
+
 ### CLI 

 ```
-usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] location
+usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] [-c] location
                                                                                                                             
 Home Harvest Property Scraper                                                                                                 
                                                                                                                             
@@ -54,35 +80,16 @@ options:
                        Proxy to use for scraping                                                                             
  -d DAYS, --days DAYS  Sold/listed in last _ days filter.                                                                           
  -r RADIUS, --radius RADIUS                                                                                                  
-                        Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses.        
-  -m, --mls_only        If set, fetches only MLS listings.
+                        Get comparable properties within _ (e.g., 0.0) miles. Only applicable for individual addresses.        
+  -m, --mls_only        If set, fetches only MLS listings.                                                                    
+  -c, --pending_or_contingent
+                        If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.
+
 ```
 ```bash
 > homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest
 ```

-### Python
-
-```py
-from homeharvest import scrape_property
-from datetime import datetime
-
-# Generate filename based on current timestamp
-current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-filename = f"output/{current_timestamp}.csv"
-
-properties = scrape_property(
-  location="San Diego, CA",
-  listing_type="sold",  # or (for_sale, for_rent)
-  property_younger_than=30,  # sold in last 30 days - listed in last x days if (for_sale, for_rent)
-  mls_only=True,  # only fetch MLS listings
-)
-print(f"Number of properties: {len(properties)}")
-
-# Export to csv
-properties.to_csv(filename, index=False)
-print(properties.head())
-```

 ## Output
 ```plaintext
@@ -111,6 +118,8 @@ Optional
 │
 ├── property_younger_than (integer): Number of past days to filter properties. Utilizes 'last_sold_date' for 'sold' listing types, and 'list_date' for others (for_rent, for_sale).
 │    Example: 30 (fetches properties listed/sold in the last 30 days)
+|
+├── pending_or_contingent (True/False): If set, fetches only pending or contingent listings. Only applicable for `for_sale listings` from general area searches.
 │
 ├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings)
 │
--- a/examples/HomeHarvest_Demo.py
+++ b/examples/HomeHarvest_Demo.py
@@ -3,16 +3,18 @@ from datetime import datetime

 # Generate filename based on current timestamp
 current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-filename = f"output/{current_timestamp}.csv"
+filename = f"HomeHarvest_{current_timestamp}.csv"

 properties = scrape_property(
    location="San Diego, CA",
-    listing_type="sold", # for_sale, for_rent
-    property_younger_than=30, # sold/listed in last 30 days
-    mls_only=True, # only fetch MLS listings
+    listing_type="sold",  # or (for_sale, for_rent)
+    past_days=30,  # sold in last 30 days - listed in last x days if (for_sale, for_rent)
+    # pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
+    # mls_only=True,  # only fetch MLS listings
+    # proxy="http://user:pass@host:port"  # use a proxy to change your IP address
 )
 print(f"Number of properties: {len(properties)}")

 # Export to csv
 properties.to_csv(filename, index=False)
-print(properties.head())
+print(properties.head())
--- a/homeharvest/init.py
+++ b/homeharvest/init.py
@@ -12,7 +12,7 @@ def scrape_property(
    listing_type: str = "for_sale",
    radius: float = None,
    mls_only: bool = False,
-    property_younger_than: int = None,
+    past_days: int = None,
    pending_or_contingent: bool = False,
    proxy: str = None,
 ) -> pd.DataFrame:
@@ -22,7 +22,7 @@ def scrape_property(
    :param listing_type: Listing Type (for_sale, for_rent, sold)
    :param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
    :param mls_only: If set, fetches only listings with MLS IDs.
-    :param property_younger_than: Get properties sold/listed in last _ days.
+    :param past_days: Get properties sold or listed (dependent on your listing_type) in the last _ days.
    :param pending_or_contingent: If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.
    :param proxy: Proxy to use for scraping
    """
@@ -34,7 +34,7 @@ def scrape_property(
        proxy=proxy,
        radius=radius,
        mls_only=mls_only,
-        last_x_days=property_younger_than,
+        last_x_days=past_days,
        pending_or_contingent=pending_or_contingent,
    )

--- a/homeharvest/cli.py
+++ b/homeharvest/cli.py
@@ -60,6 +60,13 @@ def main():
        help="If set, fetches only MLS listings.",
    )

+    parser.add_argument(
+        "-c",
+        "--pending_or_contingent",
+        action="store_true",
+        help="If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.",
+    )
+
    args = parser.parse_args()

    result = scrape_property(
@@ -68,7 +75,8 @@ def main():
        radius=args.radius,
        proxy=args.proxy,
        mls_only=args.mls_only,
-        property_younger_than=args.days,
+        past_days=args.days,
+        pending_or_contingent=args.pending_or_contingent,
    )

    if not args.filename:
--- a/homeharvest/core/scrapers/realtor/init.py
+++ b/homeharvest/core/scrapers/realtor/init.py
@@ -105,10 +105,10 @@ class RealtorScraper(Scraper):
        )

        able_to_get_lat_long = (
-                property_info
-                and property_info.get("address")
-                and property_info["address"].get("location")
-                and property_info["address"]["location"].get("coordinate")
+            property_info
+            and property_info.get("address")
+            and property_info["address"].get("location")
+            and property_info["address"]["location"].get("coordinate")
        )

        listing = Property(
@@ -122,8 +122,10 @@ class RealtorScraper(Scraper):
            list_date=property_info["basic"]["list_date"].split("T")[0]
            if property_info["basic"].get("list_date")
            else None,
-            prc_sqft=property_info["basic"].get("price") / property_info["basic"].get("sqft")
-            if property_info["basic"].get("price") and property_info["basic"].get("sqft")
+            prc_sqft=property_info["basic"].get("price")
+            / property_info["basic"].get("sqft")
+            if property_info["basic"].get("price")
+            and property_info["basic"].get("sqft")
            else None,
            last_sold_date=property_info["basic"]["sold_date"].split("T")[0]
            if property_info["basic"].get("sold_date")
@@ -146,7 +148,7 @@ class RealtorScraper(Scraper):
                year_built=property_info["details"].get("year_built"),
                garage=property_info["details"].get("garage"),
                stories=property_info["details"].get("stories"),
-            )
+            ),
        )

        return [listing]
@@ -175,7 +177,10 @@ class RealtorScraper(Scraper):
        if property_info["listings"] is None:
            return None

-        primary_listing = next((listing for listing in property_info["listings"] if listing["primary"]), None)
+        primary_listing = next(
+            (listing for listing in property_info["listings"] if listing["primary"]),
+            None,
+        )
        if primary_listing:
            return primary_listing["listing_id"]
        else:
@@ -328,7 +333,11 @@ class RealtorScraper(Scraper):
            else "sort: [{ field: list_date, direction: desc }]"
        )

-        pending_or_contingent_param = "or_filters: { contingent: true, pending: true }" if self.pending_or_contingent else ""
+        pending_or_contingent_param = (
+            "or_filters: { contingent: true, pending: true }"
+            if self.pending_or_contingent
+            else ""
+        )

        if search_type == "comps":  #: comps search, came from an address
            query = """query Property_search(
@@ -384,7 +393,7 @@ class RealtorScraper(Scraper):
            )
        else:  #: general search, came from an address
            query = (
-                    """query Property_search(
+                """query Property_search(
                        $property_id: [ID]!
                        $offset: Int!,
                    ) {
@@ -394,7 +403,9 @@ class RealtorScraper(Scraper):
                            }
                            limit: 1
                            offset: $offset
-                        ) %s""" % results_query)
+                        ) %s"""
+                % results_query
+            )

        payload = {
            "query": query,
@@ -477,13 +488,21 @@ class RealtorScraper(Scraper):
            "offset": 0,
        }

-        search_type = "comps" if self.radius and location_type == "address" else "address" if location_type == "address" and not self.radius else "area"
+        search_type = (
+            "comps"
+            if self.radius and location_type == "address"
+            else "address"
+            if location_type == "address" and not self.radius
+            else "area"
+        )
        if location_type == "address":
            if not self.radius:  #: single address search, non comps
                property_id = location_info["mpr_id"]
                search_variables |= {"property_id": property_id}

-                gql_results = self.general_search(search_variables, search_type=search_type)
+                gql_results = self.general_search(
+                    search_variables, search_type=search_type
+                )
                if gql_results["total"] == 0:
                    listing_id = self.get_latest_listing_id(property_id)
                    if listing_id is None:
@@ -561,8 +580,17 @@ class RealtorScraper(Scraper):
    @staticmethod
    def _parse_description(result: dict) -> Description:
        description_data = result.get("description", {})
+
+        if description_data is None or not isinstance(description_data, dict):
+            print("Warning: description_data is invalid!")
+            description_data = {}
+
+        style = description_data.get("type", "")
+        if style is not None:
+            style = style.upper()
+
        return Description(
-            style=description_data.get("type", "").upper(),
+            style=style,
            beds=description_data.get("beds"),
            baths_full=description_data.get("baths_full"),
            baths_half=description_data.get("baths_half"),
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "homeharvest"
-version = "0.3.0"
+version = "0.3.2"
 description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
 authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
 homepage = "https://github.com/ZacharyHampton/HomeHarvest"
--- a/tests/test_realtor.py
+++ b/tests/test_realtor.py
@@ -16,7 +16,12 @@ def test_realtor_pending_or_contingent():
        pending_or_contingent=False,
    )

-    assert all([result is not None for result in [pending_or_contingent_result, regular_result]])
+    assert all(
+        [
+            result is not None
+            for result in [pending_or_contingent_result, regular_result]
+        ]
+    )
    assert len(pending_or_contingent_result) != len(regular_result)


@@ -24,7 +29,7 @@ def test_realtor_comps():
    result = scrape_property(
        location="2530 Al Lipscomb Way",
        radius=0.5,
-        property_younger_than=180,
+        past_days=180,
        listing_type="sold",
    )

@@ -33,11 +38,11 @@ def test_realtor_comps():

 def test_realtor_last_x_days_sold():
    days_result_30 = scrape_property(
-        location="Dallas, TX", listing_type="sold", property_younger_than=30
+        location="Dallas, TX", listing_type="sold", past_days=30
    )

    days_result_10 = scrape_property(
-        location="Dallas, TX", listing_type="sold", property_younger_than=10
+        location="Dallas, TX", listing_type="sold", past_days=10
    )

    assert all(
Author	SHA1	Message	Date
Zachary Hampton	00537329cf	- version bump	2023-10-04 21:35:21 -07:00
Zachary Hampton	a9225b532f	- rename days variable	2023-10-04 21:35:14 -07:00
Zachary Hampton	ba7ad069c9	Merge pull request #32 from ZacharyHampton/key_error [fix] keyerror on style	2023-10-04 20:35:05 -07:00
Cullen Watson	22bda972b0	[chore] version number	2023-10-04 22:34:52 -05:00
Cullen Watson	6f5bbf79a4	[fix] keyerror on style	2023-10-04 22:33:21 -05:00
Cullen Watson	608cceba34	[docs] reorder	2023-10-04 22:12:16 -05:00
Cullen Watson	3609586995	[docs]: add contingent to example	2023-10-04 22:11:38 -05:00
Cullen Watson	68c7e411e4	[docs] pending / contingent searches	2023-10-04 22:07:51 -05:00
Cullen Watson	5e825601a7	[docs] update example	2023-10-04 21:50:54 -05:00
Cullen Watson	ce3f94d0af	[docs] update example	2023-10-04 21:50:16 -05:00