From 1464b4f7d47d43f065a7e435476fd5856f2f6964 Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Tue, 3 Oct 2023 21:16:38 -0500
Subject: [PATCH] [enh]: clean data

---
 homeharvest/__init__.py                       | 123 ++-----
 homeharvest/cli.py                            |  18 +-
 homeharvest/core/scrapers/__init__.py         |  31 +-
 homeharvest/core/scrapers/models.py           | 102 ++----
 homeharvest/core/scrapers/realtor/__init__.py | 231 +++++++-----
 homeharvest/core/scrapers/redfin/__init__.py  | 246 -------------
 homeharvest/core/scrapers/zillow/__init__.py  | 335 ------------------
 homeharvest/exceptions.py                     |   4 +
 homeharvest/utils.py                          |  98 +++--
 tests/test_realtor.py                         |   8 +-
 tests/test_redfin.py                          |  24 +-
 tests/test_utils.py                           |   8 +-
 tests/test_zillow.py                          |  22 +-
 13 files changed, 353 insertions(+), 897 deletions(-)
 delete mode 100644 homeharvest/core/scrapers/redfin/__init__.py
 delete mode 100644 homeharvest/core/scrapers/zillow/__init__.py

diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py
index 2b60e3b..d2f34e0 100644
--- a/homeharvest/__init__.py
+++ b/homeharvest/__init__.py
@@ -3,139 +3,60 @@ from typing import Union
 import concurrent.futures
 from concurrent.futures import ThreadPoolExecutor
 
+from .utils import process_result, ordered_properties
 from .core.scrapers import ScraperInput
-from .core.scrapers.redfin import RedfinScraper
 from .core.scrapers.realtor import RealtorScraper
-from .core.scrapers.zillow import ZillowScraper
-from .core.scrapers.models import ListingType, Property, SiteName
+from .core.scrapers.models import Status, Property, SiteName
 from .exceptions import InvalidSite, InvalidListingType
 
 
 _scrapers = {
-    "redfin": RedfinScraper,
     "realtor.com": RealtorScraper,
-    "zillow": ZillowScraper,
 }
 
 
-def _validate_input(site_name: str, listing_type: str) -> None:
+def _validate_input(site_name: str, status: str) -> None:
     if site_name.lower() not in _scrapers:
         raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
 
-    if listing_type.upper() not in ListingType.__members__:
-        raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")
+    if status.upper() not in Status.__members__:
+        raise InvalidListingType(f"Provided listing type, '{status}', does not exist.")
 
 
-def _get_ordered_properties(result: Property) -> list[str]:
-    return [
-        "property_url",
-        "site_name",
-        "listing_type",
-        "property_type",
-        "status_text",
-        "baths_min",
-        "baths_max",
-        "beds_min",
-        "beds_max",
-        "sqft_min",
-        "sqft_max",
-        "price_min",
-        "price_max",
-        "unit_count",
-        "tax_assessed_value",
-        "price_per_sqft",
-        "lot_area_value",
-        "lot_area_unit",
-        "address_one",
-        "address_two",
-        "city",
-        "state",
-        "zip_code",
-        "posted_time",
-        "area_min",
-        "bldg_name",
-        "stories",
-        "year_built",
-        "agent_name",
-        "agent_phone",
-        "agent_email",
-        "days_on_market",
-        "sold_date",
-        "mls_id",
-        "img_src",
-        "latitude",
-        "longitude",
-        "description",
-    ]
-
-
-def _process_result(result: Property) -> pd.DataFrame:
-    prop_data = result.__dict__
-
-    prop_data["site_name"] = prop_data["site_name"].value
-    prop_data["listing_type"] = prop_data["listing_type"].value.lower()
-    if "property_type" in prop_data and prop_data["property_type"] is not None:
-        prop_data["property_type"] = prop_data["property_type"].value.lower()
-    else:
-        prop_data["property_type"] = None
-    if "address" in prop_data:
-        address_data = prop_data["address"]
-        prop_data["address_one"] = address_data.address_one
-        prop_data["address_two"] = address_data.address_two
-        prop_data["city"] = address_data.city
-        prop_data["state"] = address_data.state
-        prop_data["zip_code"] = address_data.zip_code
-
-        del prop_data["address"]
-
-    if "agent" in prop_data and prop_data["agent"] is not None:
-        agent_data = prop_data["agent"]
-        prop_data["agent_name"] = agent_data.name
-        prop_data["agent_phone"] = agent_data.phone
-        prop_data["agent_email"] = agent_data.email
-
-        del prop_data["agent"]
-    else:
-        prop_data["agent_name"] = None
-        prop_data["agent_phone"] = None
-        prop_data["agent_email"] = None
-
-    properties_df = pd.DataFrame([prop_data])
-    properties_df = properties_df[_get_ordered_properties(result)]
-
-    return properties_df
-
-
-def _scrape_single_site(location: str, site_name: str, listing_type: str, proxy: str = None) -> pd.DataFrame:
+def _scrape_single_site(
+    location: str, site_name: str, status: str, proxy: str = None, timeframe: str = None
+) -> pd.DataFrame:
     """
     Helper function to scrape a single site.
     """
-    _validate_input(site_name, listing_type)
+    print(status)
+    _validate_input(site_name, status)
 
     scraper_input = ScraperInput(
         location=location,
-        listing_type=ListingType[listing_type.upper()],
+        status=status,
         site_name=SiteName.get_by_value(site_name.lower()),
         proxy=proxy,
+        timeframe=timeframe,
     )
 
     site = _scrapers[site_name.lower()](scraper_input)
     results = site.search()
+    print(f"Found {len(results)} results for {site_name}")
 
-    properties_dfs = [_process_result(result) for result in results]
-    properties_dfs = [df.dropna(axis=1, how="all") for df in properties_dfs if not df.empty]
+    properties_dfs = [process_result(result) for result in results]
     if not properties_dfs:
         return pd.DataFrame()
 
-    return pd.concat(properties_dfs, ignore_index=True)
+    return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties]
 
 
 def scrape_property(
     location: str,
+    timeframe: str,
     site_name: Union[str, list[str]] = None,
-    listing_type: str = "for_sale",
+    status: str = "sale",
     proxy: str = None,
-    keep_duplicates: bool = False
 ) -> pd.DataFrame:
     """
     Scrape property from various sites from a given location and listing type.
@@ -155,12 +76,14 @@ def scrape_property(
     results = []
 
     if len(site_name) == 1:
-        final_df = _scrape_single_site(location, site_name[0], listing_type, proxy)
+        final_df = _scrape_single_site(location, site_name[0], status, proxy, timeframe)
         results.append(final_df)
     else:
         with ThreadPoolExecutor() as executor:
             futures = {
-                executor.submit(_scrape_single_site, location, s_name, listing_type, proxy): s_name
+                executor.submit(
+                    _scrape_single_site, location, s_name, status, proxy, timeframe
+                ): s_name
                 for s_name in site_name
             }
 
@@ -175,13 +98,11 @@ def scrape_property(
 
     final_df = pd.concat(results, ignore_index=True)
 
-    columns_to_track = ["address_one", "address_two", "city"]
+    columns_to_track = ["Street", "Unit", "Zip"]
 
     #: validate they exist, otherwise create them
     for col in columns_to_track:
         if col not in final_df.columns:
             final_df[col] = None
 
-    if not keep_duplicates:
-        final_df = final_df.drop_duplicates(subset=columns_to_track, keep="first")
     return final_df
diff --git a/homeharvest/cli.py b/homeharvest/cli.py
index c9deae8..d239310 100644
--- a/homeharvest/cli.py
+++ b/homeharvest/cli.py
@@ -5,7 +5,9 @@ from homeharvest import scrape_property
 
 def main():
     parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
-    parser.add_argument("location", type=str, help="Location to scrape (e.g., San Francisco, CA)")
+    parser.add_argument(
+        "location", type=str, help="Location to scrape (e.g., San Francisco, CA)"
+    )
 
     parser.add_argument(
         "-s",
@@ -46,14 +48,22 @@ def main():
         "-k",
         "--keep_duplicates",
         action="store_true",
-        help="Keep duplicate properties based on address"
+        help="Keep duplicate properties based on address",
     )
 
-    parser.add_argument("-p", "--proxy", type=str, default=None, help="Proxy to use for scraping")
+    parser.add_argument(
+        "-p", "--proxy", type=str, default=None, help="Proxy to use for scraping"
+    )
 
     args = parser.parse_args()
 
-    result = scrape_property(args.location, args.site_name, args.listing_type, proxy=args.proxy, keep_duplicates=args.keep_duplicates)
+    result = scrape_property(
+        args.location,
+        args.site_name,
+        args.listing_type,
+        proxy=args.proxy,
+        keep_duplicates=args.keep_duplicates,
+    )
 
     if not args.filename:
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py
index e900dbe..79f4e93 100644
--- a/homeharvest/core/scrapers/__init__.py
+++ b/homeharvest/core/scrapers/__init__.py
@@ -1,21 +1,38 @@
 from dataclasses import dataclass
 import requests
 import tls_client
-from .models import Property, ListingType, SiteName
+from typing import Optional
+from .models import Property, SiteName, Status
+from ...exceptions import InvalidTimeFrame
+
+VALID_TIMEFRAMES = ["1W", "1M", "3M", "6M", "1Y"]
+VALID_STATUSES = ["sold", "for_sale", "for_rent"]
 
 
 @dataclass
 class ScraperInput:
     location: str
-    listing_type: ListingType
-    site_name: SiteName
-    proxy: str | None = None
+    status: str
+    site_name: str
+    proxy: Optional[str] = None
+    timeframe: Optional[str] = None
+
+    def __post_init__(self):
+        if self.timeframe and self.timeframe not in VALID_TIMEFRAMES:
+            raise InvalidTimeFrame(f"Invalid timeframe provided: {self.timeframe}")
+        if self.status and self.status not in VALID_STATUSES:
+            raise InvalidTimeFrame(f"Invalid status provided: {self.status}")
 
 
 class Scraper:
-    def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None):
+    def __init__(
+        self,
+        scraper_input: ScraperInput,
+        session: requests.Session | tls_client.Session = None,
+    ):
         self.location = scraper_input.location
-        self.listing_type = scraper_input.listing_type
+        self.status = scraper_input.status
+        self.timeframe = scraper_input.timeframe
 
         if not session:
             self.session = requests.Session()
@@ -27,7 +44,7 @@ class Scraper:
             proxies = {"http": proxy_url, "https": proxy_url}
             self.session.proxies.update(proxies)
 
-        self.listing_type = scraper_input.listing_type
+        self.listing_type = scraper_input.status
         self.site_name = scraper_input.site_name
 
     def search(self) -> list[Property]:
diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py
index ed75999..4f20094 100644
--- a/homeharvest/core/scrapers/models.py
+++ b/homeharvest/core/scrapers/models.py
@@ -1,7 +1,6 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Tuple
-from datetime import datetime
+from typing import Optional
 
 
 class SiteName(Enum):
@@ -17,52 +16,19 @@ class SiteName(Enum):
         raise ValueError(f"{value} not found in {cls}")
 
 
-class ListingType(Enum):
+class Status(Enum):
     FOR_SALE = "FOR_SALE"
     FOR_RENT = "FOR_RENT"
     SOLD = "SOLD"
 
 
-class PropertyType(Enum):
-    HOUSE = "HOUSE"
-    BUILDING = "BUILDING"
-    CONDO = "CONDO"
-    TOWNHOUSE = "TOWNHOUSE"
-    SINGLE_FAMILY = "SINGLE_FAMILY"
-    MULTI_FAMILY = "MULTI_FAMILY"
-    MANUFACTURED = "MANUFACTURED"
-    NEW_CONSTRUCTION = "NEW_CONSTRUCTION"
-    APARTMENT = "APARTMENT"
-    APARTMENTS = "APARTMENTS"
-    LAND = "LAND"
-    LOT = "LOT"
-    OTHER = "OTHER"
-
-    BLANK = "BLANK"
-
-    @classmethod
-    def from_int_code(cls, code):
-        mapping = {
-            1: cls.HOUSE,
-            2: cls.CONDO,
-            3: cls.TOWNHOUSE,
-            4: cls.MULTI_FAMILY,
-            5: cls.LAND,
-            6: cls.OTHER,
-            8: cls.SINGLE_FAMILY,
-            13: cls.SINGLE_FAMILY,
-        }
-
-        return mapping.get(code, cls.BLANK)
-
-
 @dataclass
 class Address:
-    address_one: str | None = None
-    address_two: str | None = "#"
+    street: str | None = None
+    unit: str | None = None
     city: str | None = None
     state: str | None = None
-    zip_code: str | None = None
+    zip: str | None = None
 
 
 @dataclass
@@ -74,47 +40,31 @@ class Agent:
 
 @dataclass
 class Property:
-    property_url: str
-    site_name: SiteName
-    listing_type: ListingType
-    address: Address
-    property_type: PropertyType | None = None
-
-    # house for sale
-    tax_assessed_value: int | None = None
-    lot_area_value: float | None = None
-    lot_area_unit: str | None = None
-    stories: int | None = None
-    year_built: int | None = None
-    price_per_sqft: int | None = None
+    property_url: str | None = None
+    mls: str | None = None
     mls_id: str | None = None
+    status: str | None = None
+    style: str | None = None
 
-    agent: Agent | None = None
-    img_src: str | None = None
-    description: str | None = None
-    status_text: str | None = None
-    posted_time: datetime | None = None
+    beds: int | None = None
+    baths_full: int | None = None
+    baths_half: int | None = None
+    list_price: int | None = None
+    list_date: str | None = None
+    sold_price: int | None = None
+    last_sold_date: str | None = None
+    prc_sqft: float | None = None
+    est_sf: int | None = None
+    lot_sf: int | None = None
+    hoa_fee: int | None = None
 
-    # building for sale
-    bldg_name: str | None = None
-    area_min: int | None = None
-
-    beds_min: int | None = None
-    beds_max: int | None = None
-
-    baths_min: float | None = None
-    baths_max: float | None = None
-
-    sqft_min: int | None = None
-    sqft_max: int | None = None
-
-    price_min: int | None = None
-    price_max: int | None = None
-
-    unit_count: int | None = None
+    address: Address | None = None
 
+    yr_blt: int | None = None
     latitude: float | None = None
     longitude: float | None = None
 
-    sold_date: datetime | None = None
-    days_on_market: int | None = None
+    stories: int | None = None
+    prkg_gar: float | None = None
+
+    neighborhoods: Optional[str] = None
diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py
index 78ecc84..d0e3352 100644
--- a/homeharvest/core/scrapers/realtor/__init__.py
+++ b/homeharvest/core/scrapers/realtor/__init__.py
@@ -4,10 +4,10 @@ homeharvest.realtor.__init__
 
 This module implements the scraper for relator.com
 """
+from typing import Dict, Union
 from ..models import Property, Address
 from .. import Scraper
 from ....exceptions import NoResultsFound
-from ....utils import parse_address_one, parse_address_two
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 
@@ -15,9 +15,7 @@ class RealtorScraper(Scraper):
     def __init__(self, scraper_input):
         self.counter = 1
         super().__init__(scraper_input)
-        self.search_url = (
-            "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
-        )
+        self.endpoint = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
 
     def handle_location(self):
         headers = {
@@ -37,7 +35,7 @@ class RealtorScraper(Scraper):
 
         params = {
             "input": self.location,
-            "client_id": self.listing_type.value.lower().replace("_", "-"),
+            "client_id": self.listing_type.lower().replace("_", "-"),
             "limit": "1",
             "area_types": "city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park",
         }
@@ -68,7 +66,6 @@ class RealtorScraper(Scraper):
                             garage
                             permalink
                             year_built
-                            stories
                         }
                         address {
                             address_validation_code
@@ -100,7 +97,6 @@ class RealtorScraper(Scraper):
                         public_record {
                             lot_size
                             sqft
-                            stories
                             units
                             year_built
                         }
@@ -114,56 +110,48 @@ class RealtorScraper(Scraper):
             "variables": variables,
         }
 
-        response = self.session.post(self.search_url, json=payload)
+        response = self.session.post(self.endpoint, json=payload)
         response_json = response.json()
 
         property_info = response_json["data"]["property"]
-        address_one, address_two = parse_address_one(property_info["address"]["line"])
 
         return [
             Property(
-                site_name=self.site_name,
-                address=Address(
-                    address_one=address_one,
-                    address_two=address_two,
-                    city=property_info["address"]["city"],
-                    state=property_info["address"]["state_code"],
-                    zip_code=property_info["address"]["postal_code"],
-                ),
                 property_url="https://www.realtor.com/realestateandhomes-detail/"
                 + property_info["details"]["permalink"],
-                stories=property_info["details"]["stories"],
-                year_built=property_info["details"]["year_built"],
-                price_per_sqft=property_info["basic"]["price"] // property_info["basic"]["sqft"]
-                if property_info["basic"]["sqft"] is not None and property_info["basic"]["price"] is not None
+                address=Address(
+                    street=f"{property_info['address']['street_number']} {property_info['address']['street_name']} {property_info['address']['street_suffix']}",
+                    unit=property_info["address"]["unit_value"],
+                    city=property_info["address"]["city"],
+                    state=property_info["address"]["state_code"],
+                    zip=property_info["address"]["postal_code"],
+                ),
+                yr_blt=property_info["details"]["year_built"],
+                prc_sqft=property_info["basic"]["price"]
+                // property_info["basic"]["sqft"]
+                if property_info["basic"]["sqft"] is not None
+                and property_info["basic"]["price"] is not None
                 else None,
-                mls_id=property_id,
-                listing_type=self.listing_type,
-                lot_area_value=property_info["public_record"]["lot_size"]
-                if property_info["public_record"] is not None
-                else None,
-                beds_min=property_info["basic"]["beds"],
-                beds_max=property_info["basic"]["beds"],
-                baths_min=property_info["basic"]["baths"],
-                baths_max=property_info["basic"]["baths"],
-                sqft_min=property_info["basic"]["sqft"],
-                sqft_max=property_info["basic"]["sqft"],
-                price_min=property_info["basic"]["price"],
-                price_max=property_info["basic"]["price"],
+                status=self.status.upper(),
+                beds=property_info["basic"]["beds"],
+                baths_full=property_info["basic"]["baths"],
+                lot_sf=property_info["basic"]["lot_sqft"],
+                est_sf=property_info["basic"]["sqft"],
+                list_price=property_info["basic"]["price"],
+                sold_price=property_info["basic"]["sold_price"],
             )
         ]
 
-    def handle_area(self, variables: dict, return_total: bool = False) -> list[Property] | int:
+    def handle_area(self, variables: dict) -> Dict[str, Union[int, list[Property]]]:
         """
         Handles a location area & returns a list of properties
         """
-        query = (
-            """query Home_search(
+        query = """query Home_search(
                             $city: String,
                             $county: [String],
                             $state_code: String,
-                            $postal_code: String
-                            $offset: Int,
+                            $postal_code: String,
+                            $offset: Int
                         ) {
                             home_search(
                                 query: {
@@ -172,26 +160,66 @@ class RealtorScraper(Scraper):
                                     postal_code: $postal_code
                                     state_code: $state_code
                                     status: %s
+                                    sold_date: {
+                                        min: %s
+                                    }
                                 }
                                 limit: 200
                                 offset: $offset
+                                sort: [
+                        {
+                            field: sold_date,
+                            direction: desc 
+                        }
+                    ]
                             ) {
                                 count
                                 total
                                 results {
                                     property_id
+                                    list_date
+                                    status
+                                    last_sold_price
+                                    last_sold_date
+                                    hoa {
+                                    fee
+                                    }
                                     description {
-                                        baths
+                                        baths_full
+                                        baths_half
                                         beds
                                         lot_sqft
                                         sqft
-                                        text
                                         sold_price
-                                        stories
                                         year_built
                                         garage
-                                        unit_number
-                                        floor_number
+                                        sold_price
+                                        type
+                                        sub_type
+                                        name
+                                        stories
+                                    }
+                                    source {
+                                        raw {
+                                            area
+                                            status
+                                            style
+                                        }
+                                        last_update_date
+                                        contract_date
+                                        id
+                                        listing_id
+                                        name
+                                        type
+                                        listing_href
+                                        community_id
+                                        management_id
+                                        corporation_id
+                                        subdivision_status
+                                        spec_id
+                                        plan_id
+                                        tier_rank
+                                        feed_type
                                     }
                                     location {
                                         address {
@@ -201,41 +229,44 @@ class RealtorScraper(Scraper):
                                             postal_code
                                             state_code
                                             state
+                                            coordinate {
+                                                lon
+                                                lat
+                                            }
                                             street_direction
                                             street_name
                                             street_number
                                             street_post_direction
                                             street_suffix
                                             unit
-                                            coordinate {
-                                                lon
-                                                lat
-                                            }
+                                        }
+                                        neighborhoods {
+                                        name
                                         }
                                     }
                                     list_price
                                     price_per_sqft
+                                                                        style_category_tags {
+                                                                        exterior}
+
                                     source {
                                         id
                                     }
                                 }
                             }
-                        }"""
-            % self.listing_type.value.lower()
+                        }""" % (
+            self.status,
+            f'"$nowUTC-{self.timeframe}"',
         )
 
         payload = {
             "query": query,
             "variables": variables,
         }
-
-        response = self.session.post(self.search_url, json=payload)
+        response = self.session.post(self.endpoint, json=payload)
         response.raise_for_status()
         response_json = response.json()
 
-        if return_total:
-            return response_json["data"]["home_search"]["total"]
-
         properties: list[Property] = []
 
         if (
@@ -246,19 +277,67 @@ class RealtorScraper(Scraper):
             or response_json["data"]["home_search"] is None
             or "results" not in response_json["data"]["home_search"]
         ):
-            return []
+            return {"total": 0, "properties": []}
 
         for result in response_json["data"]["home_search"]["results"]:
             self.counter += 1
-            address_one, _ = parse_address_one(result["location"]["address"]["line"])
+            mls = (
+                result["source"].get("id")
+                if "source" in result and isinstance(result["source"], dict)
+                else None
+            )
+            mls_id = (
+                result["source"].get("listing_id")
+                if "source" in result and isinstance(result["source"], dict)
+                else None
+            )
+
+            if not mls_id:
+                continue
+                # not type
+
+            neighborhoods_list = []
+            neighborhoods = result["location"].get("neighborhoods", [])
+
+            if neighborhoods:
+                for neighborhood in neighborhoods:
+                    name = neighborhood.get("name")
+                    if name:
+                        neighborhoods_list.append(name)
+
+            neighborhoods_str = (
+                ", ".join(neighborhoods_list) if neighborhoods_list else None
+            )
+
             realty_property = Property(
+                property_url="https://www.realtor.com/realestateandhomes-detail/"
+                + result["property_id"],
+                mls=mls,
+                mls_id=mls_id,
+                # status=(result["source"]["raw"].get("status").upper() if 'source' in result and isinstance(result["source"], dict) and "raw" in result["source"] and isinstance(result["source"]["raw"], dict) else None),
+                status=result["status"].upper(),
+                style=result["description"]["type"].upper(),
+                beds=result["description"]["beds"],
+                baths_full=result["description"]["baths_full"],
+                baths_half=result["description"]["baths_half"],
+                est_sf=result["description"]["sqft"],
+                lot_sf=result["description"]["lot_sqft"],
+                list_price=result["list_price"],
+                list_date=result["list_date"].split("T")[0],
+                sold_price=result["description"]["sold_price"],
+                prc_sqft=result["price_per_sqft"],
+                last_sold_date=result["last_sold_date"],
+                hoa_fee=result["hoa"]["fee"]
+                if result.get("hoa") and isinstance(result["hoa"], dict)
+                else None,
                 address=Address(
-                    address_one=address_one,
+                    street=f"{result['location']['address']['street_number']} {result['location']['address']['street_name']} {result['location']['address']['street_suffix']}",
+                    unit=result["location"]["address"]["unit"],
                     city=result["location"]["address"]["city"],
                     state=result["location"]["address"]["state_code"],
-                    zip_code=result["location"]["address"]["postal_code"],
-                    address_two=parse_address_two(result["location"]["address"]["unit"]),
+                    zip=result["location"]["address"]["postal_code"],
                 ),
+                yr_blt=result["description"]["year_built"],
                 latitude=result["location"]["address"]["coordinate"]["lat"]
                 if result
                 and result.get("location")
@@ -273,26 +352,16 @@ class RealtorScraper(Scraper):
                 and result["location"]["address"].get("coordinate")
                 and "lon" in result["location"]["address"]["coordinate"]
                 else None,
-                site_name=self.site_name,
-                property_url="https://www.realtor.com/realestateandhomes-detail/" + result["property_id"],
+                prkg_gar=result["description"]["garage"],
                 stories=result["description"]["stories"],
-                year_built=result["description"]["year_built"],
-                price_per_sqft=result["price_per_sqft"],
-                mls_id=result["property_id"],
-                listing_type=self.listing_type,
-                lot_area_value=result["description"]["lot_sqft"],
-                beds_min=result["description"]["beds"],
-                beds_max=result["description"]["beds"],
-                baths_min=result["description"]["baths"],
-                baths_max=result["description"]["baths"],
-                sqft_min=result["description"]["sqft"],
-                sqft_max=result["description"]["sqft"],
-                price_min=result["list_price"],
-                price_max=result["list_price"],
+                neighborhoods=neighborhoods_str,
             )
             properties.append(realty_property)
 
-        return properties
+        return {
+            "total": response_json["data"]["home_search"]["total"],
+            "properties": properties,
+        }
 
     def search(self):
         location_info = self.handle_location()
@@ -311,20 +380,20 @@ class RealtorScraper(Scraper):
             "offset": offset,
         }
 
-        total = self.handle_area(search_variables, return_total=True)
+        result = self.handle_area(search_variables)
+        total = result["total"]
+        homes = result["properties"]
 
-        homes = []
         with ThreadPoolExecutor(max_workers=10) as executor:
             futures = [
                 executor.submit(
                     self.handle_area,
                     variables=search_variables | {"offset": i},
-                    return_total=False,
                 )
-                for i in range(0, total, 200)
+                for i in range(200, min(total, 10000), 200)
             ]
 
             for future in as_completed(futures):
-                homes.extend(future.result())
+                homes.extend(future.result()["properties"])
 
         return homes
diff --git a/homeharvest/core/scrapers/redfin/__init__.py b/homeharvest/core/scrapers/redfin/__init__.py
deleted file mode 100644
index 80b91f8..0000000
--- a/homeharvest/core/scrapers/redfin/__init__.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""
-homeharvest.redfin.__init__
-~~~~~~~~~~~~
-
-This module implements the scraper for redfin.com
-"""
-import json
-from typing import Any
-from .. import Scraper
-from ....utils import parse_address_two, parse_address_one
-from ..models import Property, Address, PropertyType, ListingType, SiteName, Agent
-from ....exceptions import NoResultsFound, SearchTooBroad
-from datetime import datetime
-
-
-class RedfinScraper(Scraper):
-    def __init__(self, scraper_input):
-        super().__init__(scraper_input)
-        self.listing_type = scraper_input.listing_type
-
-    def _handle_location(self):
-        url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(self.location)
-
-        response = self.session.get(url)
-        response_json = json.loads(response.text.replace("{}&&", ""))
-
-        def get_region_type(match_type: str):
-            if match_type == "4":
-                return "2"  #: zip
-            elif match_type == "2":
-                return "6"  #: city
-            elif match_type == "1":
-                return "address"  #: address, needs to be handled differently
-            elif match_type == "11":
-                return "state"
-
-        if "exactMatch" not in response_json["payload"]:
-            raise NoResultsFound("No results found for location: {}".format(self.location))
-
-        if response_json["payload"]["exactMatch"] is not None:
-            target = response_json["payload"]["exactMatch"]
-        else:
-            target = response_json["payload"]["sections"][0]["rows"][0]
-
-        return target["id"].split("_")[1], get_region_type(target["type"])
-
-    def _parse_home(self, home: dict, single_search: bool = False) -> Property:
-        def get_value(key: str) -> Any | None:
-            if key in home and "value" in home[key]:
-                return home[key]["value"]
-
-        if not single_search:
-            address = Address(
-                address_one=parse_address_one(get_value("streetLine"))[0],
-                address_two=parse_address_one(get_value("streetLine"))[1],
-                city=home.get("city"),
-                state=home.get("state"),
-                zip_code=home.get("zip"),
-            )
-        else:
-            address_info = home.get("streetAddress")
-            address_one, address_two = parse_address_one(address_info.get("assembledAddress"))
-
-            address = Address(
-                address_one=address_one,
-                address_two=address_two,
-                city=home.get("city"),
-                state=home.get("state"),
-                zip_code=home.get("zip"),
-            )
-
-        url = "https://www.redfin.com{}".format(home["url"])
-        lot_size_data = home.get("lotSize")
-
-        if not isinstance(lot_size_data, int):
-            lot_size = lot_size_data.get("value", None) if isinstance(lot_size_data, dict) else None
-        else:
-            lot_size = lot_size_data
-
-        lat_long = get_value("latLong")
-
-        return Property(
-            site_name=self.site_name,
-            listing_type=self.listing_type,
-            address=address,
-            property_url=url,
-            beds_min=home["beds"] if "beds" in home else None,
-            beds_max=home["beds"] if "beds" in home else None,
-            baths_min=home["baths"] if "baths" in home else None,
-            baths_max=home["baths"] if "baths" in home else None,
-            price_min=get_value("price"),
-            price_max=get_value("price"),
-            sqft_min=get_value("sqFt"),
-            sqft_max=get_value("sqFt"),
-            stories=home["stories"] if "stories" in home else None,
-            agent=Agent(  #: listingAgent, some have sellingAgent as well
-                name=home['listingAgent'].get('name') if 'listingAgent' in home else None,
-                phone=home['listingAgent'].get('phone') if 'listingAgent' in home else None,
-            ),
-            description=home["listingRemarks"] if "listingRemarks" in home else None,
-            year_built=get_value("yearBuilt") if not single_search else home.get("yearBuilt"),
-            lot_area_value=lot_size,
-            property_type=PropertyType.from_int_code(home.get("propertyType")),
-            price_per_sqft=get_value("pricePerSqFt") if type(home.get("pricePerSqFt")) != int else home.get("pricePerSqFt"),
-            mls_id=get_value("mlsId"),
-            latitude=lat_long.get('latitude') if lat_long else None,
-            longitude=lat_long.get('longitude') if lat_long else None,
-            sold_date=datetime.fromtimestamp(home['soldDate'] / 1000) if 'soldDate' in home else None,
-            days_on_market=get_value("dom")
-        )
-
-    def _handle_rentals(self, region_id, region_type):
-        url = f"https://www.redfin.com/stingray/api/v1/search/rentals?al=1&isRentals=true&region_id={region_id}&region_type={region_type}&num_homes=100000"
-
-        response = self.session.get(url)
-        response.raise_for_status()
-        homes = response.json()
-
-        properties_list = []
-
-        for home in homes["homes"]:
-            home_data = home["homeData"]
-            rental_data = home["rentalExtension"]
-
-            property_url = f"https://www.redfin.com{home_data.get('url', '')}"
-            address_info = home_data.get("addressInfo", {})
-            centroid = address_info.get("centroid", {}).get("centroid", {})
-            address = Address(
-                address_one=parse_address_one(address_info.get("formattedStreetLine"))[0],
-                city=address_info.get("city"),
-                state=address_info.get("state"),
-                zip_code=address_info.get("zip"),
-            )
-
-            price_range = rental_data.get("rentPriceRange", {"min": None, "max": None})
-            bed_range = rental_data.get("bedRange", {"min": None, "max": None})
-            bath_range = rental_data.get("bathRange", {"min": None, "max": None})
-            sqft_range = rental_data.get("sqftRange", {"min": None, "max": None})
-
-            property_ = Property(
-                property_url=property_url,
-                site_name=SiteName.REDFIN,
-                listing_type=ListingType.FOR_RENT,
-                address=address,
-                description=rental_data.get("description"),
-                latitude=centroid.get("latitude"),
-                longitude=centroid.get("longitude"),
-                baths_min=bath_range.get("min"),
-                baths_max=bath_range.get("max"),
-                beds_min=bed_range.get("min"),
-                beds_max=bed_range.get("max"),
-                price_min=price_range.get("min"),
-                price_max=price_range.get("max"),
-                sqft_min=sqft_range.get("min"),
-                sqft_max=sqft_range.get("max"),
-                img_src=home_data.get("staticMapUrl"),
-                posted_time=rental_data.get("lastUpdated"),
-                bldg_name=rental_data.get("propertyName"),
-            )
-
-            properties_list.append(property_)
-
-        if not properties_list:
-            raise NoResultsFound("No rentals found for the given location.")
-
-        return properties_list
-
-    def _parse_building(self, building: dict) -> Property:
-        street_address = " ".join(
-            [
-                building["address"]["streetNumber"],
-                building["address"]["directionalPrefix"],
-                building["address"]["streetName"],
-                building["address"]["streetType"],
-            ]
-        )
-        return Property(
-            site_name=self.site_name,
-            property_type=PropertyType("BUILDING"),
-            address=Address(
-                address_one=parse_address_one(street_address)[0],
-                city=building["address"]["city"],
-                state=building["address"]["stateOrProvinceCode"],
-                zip_code=building["address"]["postalCode"],
-                address_two=parse_address_two(
-                    " ".join(
-                        [
-                            building["address"]["unitType"],
-                            building["address"]["unitValue"],
-                        ]
-                    )
-                ),
-            ),
-            property_url="https://www.redfin.com{}".format(building["url"]),
-            listing_type=self.listing_type,
-            unit_count=building.get("numUnitsForSale"),
-        )
-
-    def handle_address(self, home_id: str):
-        """
-        EPs:
-        https://www.redfin.com/stingray/api/home/details/initialInfo?al=1&path=/TX/Austin/70-Rainey-St-78701/unit-1608/home/147337694
-        https://www.redfin.com/stingray/api/home/details/mainHouseInfoPanelInfo?propertyId=147337694&accessLevel=3
-        https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=147337694&accessLevel=3
-        https://www.redfin.com/stingray/api/home/details/belowTheFold?propertyId=147337694&accessLevel=3
-        """
-        url = "https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId={}&accessLevel=3".format(
-            home_id
-        )
-
-        response = self.session.get(url)
-        response_json = json.loads(response.text.replace("{}&&", ""))
-
-        parsed_home = self._parse_home(response_json["payload"]["addressSectionInfo"], single_search=True)
-        return [parsed_home]
-
-    def search(self):
-        region_id, region_type = self._handle_location()
-
-        if region_type == "state":
-            raise SearchTooBroad("State searches are not supported, please use a more specific location.")
-
-        if region_type == "address":
-            home_id = region_id
-            return self.handle_address(home_id)
-
-        if self.listing_type == ListingType.FOR_RENT:
-            return self._handle_rentals(region_id, region_type)
-        else:
-            if self.listing_type == ListingType.FOR_SALE:
-                url = f"https://www.redfin.com/stingray/api/gis?al=1&region_id={region_id}&region_type={region_type}&num_homes=100000"
-            else:
-                url = f"https://www.redfin.com/stingray/api/gis?al=1&region_id={region_id}&region_type={region_type}&sold_within_days=30&num_homes=100000"
-            response = self.session.get(url)
-            response_json = json.loads(response.text.replace("{}&&", ""))
-
-            if "payload" in response_json:
-                homes_list = response_json["payload"].get("homes", [])
-                buildings_list = response_json["payload"].get("buildings", {}).values()
-
-                homes = [self._parse_home(home) for home in homes_list] + [
-                    self._parse_building(building) for building in buildings_list
-                ]
-                return homes
-            else:
-                return []
diff --git a/homeharvest/core/scrapers/zillow/__init__.py b/homeharvest/core/scrapers/zillow/__init__.py
deleted file mode 100644
index ba55a01..0000000
--- a/homeharvest/core/scrapers/zillow/__init__.py
+++ /dev/null
@@ -1,335 +0,0 @@
-"""
-homeharvest.zillow.__init__
-~~~~~~~~~~~~
-
-This module implements the scraper for zillow.com
-"""
-import re
-import json
-
-import tls_client
-
-from .. import Scraper
-from requests.exceptions import HTTPError
-from ....utils import parse_address_one, parse_address_two
-from ....exceptions import GeoCoordsNotFound, NoResultsFound
-from ..models import Property, Address, ListingType, PropertyType, Agent
-import urllib.parse
-from datetime import datetime, timedelta
-
-
-class ZillowScraper(Scraper):
-    def __init__(self, scraper_input):
-        session = tls_client.Session(
-            client_identifier="chrome112", random_tls_extension_order=True
-        )
-
-        super().__init__(scraper_input, session)
-
-        self.session.headers.update({
-            'authority': 'www.zillow.com',
-            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
-            'accept-language': 'en-US,en;q=0.9',
-            'cache-control': 'max-age=0',
-            'sec-fetch-dest': 'document',
-            'sec-fetch-mode': 'navigate',
-            'sec-fetch-site': 'same-origin',
-            'sec-fetch-user': '?1',
-            'upgrade-insecure-requests': '1',
-            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
-        })
-
-        if not self.is_plausible_location(self.location):
-            raise NoResultsFound("Invalid location input: {}".format(self.location))
-
-        listing_type_to_url_path = {
-            ListingType.FOR_SALE: "for_sale",
-            ListingType.FOR_RENT: "for_rent",
-            ListingType.SOLD: "recently_sold",
-        }
-
-        self.url = f"https://www.zillow.com/homes/{listing_type_to_url_path[self.listing_type]}/{self.location}_rb/"
-
-    def is_plausible_location(self, location: str) -> bool:
-        url = (
-            "https://www.zillowstatic.com/autocomplete/v3/suggestions?q={"
-            "}&abKey=6666272a-4b99-474c-b857-110ec438732b&clientId=homepage-render"
-        ).format(urllib.parse.quote(location))
-
-        resp = self.session.get(url)
-
-        return resp.json()["results"] != []
-
-    def search(self):
-        resp = self.session.get(self.url)
-        if resp.status_code != 200:
-            raise HTTPError(
-                f"bad response status code: {resp.status_code}"
-            )
-        content = resp.text
-
-        match = re.search(
-            r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
-            content,
-            re.DOTALL,
-        )
-        if not match:
-            raise NoResultsFound("No results were found for Zillow with the given Location.")
-
-        json_str = match.group(1)
-        data = json.loads(json_str)
-
-        if "searchPageState" in data["props"]["pageProps"]:
-            pattern = r'window\.mapBounds = \{\s*"west":\s*(-?\d+\.\d+),\s*"east":\s*(-?\d+\.\d+),\s*"south":\s*(-?\d+\.\d+),\s*"north":\s*(-?\d+\.\d+)\s*\};'
-
-            match = re.search(pattern, content)
-
-            if match:
-                coords = [float(coord) for coord in match.groups()]
-                return self._fetch_properties_backend(coords)
-
-            else:
-                raise GeoCoordsNotFound("Box bounds could not be located.")
-
-        elif "gdpClientCache" in data["props"]["pageProps"]:
-            gdp_client_cache = json.loads(data["props"]["pageProps"]["gdpClientCache"])
-            main_key = list(gdp_client_cache.keys())[0]
-
-            property_data = gdp_client_cache[main_key]["property"]
-            property = self._get_single_property_page(property_data)
-
-            return [property]
-        raise NoResultsFound("Specific property data not found in the response.")
-
-    def _fetch_properties_backend(self, coords):
-        url = "https://www.zillow.com/async-create-search-page-state"
-
-        filter_state_for_sale = {
-            "sortSelection": {
-                # "value": "globalrelevanceex"
-                "value": "days"
-            },
-            "isAllHomes": {"value": True},
-        }
-
-        filter_state_for_rent = {
-            "isForRent": {"value": True},
-            "isForSaleByAgent": {"value": False},
-            "isForSaleByOwner": {"value": False},
-            "isNewConstruction": {"value": False},
-            "isComingSoon": {"value": False},
-            "isAuction": {"value": False},
-            "isForSaleForeclosure": {"value": False},
-            "isAllHomes": {"value": True},
-        }
-
-        filter_state_sold = {
-            "isRecentlySold": {"value": True},
-            "isForSaleByAgent": {"value": False},
-            "isForSaleByOwner": {"value": False},
-            "isNewConstruction": {"value": False},
-            "isComingSoon": {"value": False},
-            "isAuction": {"value": False},
-            "isForSaleForeclosure": {"value": False},
-            "isAllHomes": {"value": True},
-        }
-
-        selected_filter = (
-            filter_state_for_rent
-            if self.listing_type == ListingType.FOR_RENT
-            else filter_state_for_sale
-            if self.listing_type == ListingType.FOR_SALE
-            else filter_state_sold
-        )
-
-        payload = {
-            "searchQueryState": {
-                "pagination": {},
-                "isMapVisible": True,
-                "mapBounds": {
-                    "west": coords[0],
-                    "east": coords[1],
-                    "south": coords[2],
-                    "north": coords[3],
-                },
-                "filterState": selected_filter,
-                "isListVisible": True,
-                "mapZoom": 11,
-            },
-            "wants": {"cat1": ["mapResults"]},
-            "isDebugRequest": False,
-        }
-        resp = self.session.put(url, json=payload)
-        if resp.status_code != 200:
-            raise HTTPError(
-                f"bad response status code: {resp.status_code}"
-            )
-        return self._parse_properties(resp.json())
-
-    @staticmethod
-    def parse_posted_time(time: str) -> datetime:
-        int_time = int(time.split(" ")[0])
-
-        if "hour" in time:
-            return datetime.now() - timedelta(hours=int_time)
-
-        if "day" in time:
-            return datetime.now() - timedelta(days=int_time)
-
-    def _parse_properties(self, property_data: dict):
-        mapresults = property_data["cat1"]["searchResults"]["mapResults"]
-
-        properties_list = []
-
-        for result in mapresults:
-            if "hdpData" in result:
-                home_info = result["hdpData"]["homeInfo"]
-                address_data = {
-                    "address_one": parse_address_one(home_info.get("streetAddress"))[0],
-                    "address_two": parse_address_two(home_info["unit"]) if "unit" in home_info else "#",
-                    "city": home_info.get("city"),
-                    "state": home_info.get("state"),
-                    "zip_code": home_info.get("zipcode"),
-                }
-                property_obj = Property(
-                    site_name=self.site_name,
-                    address=Address(**address_data),
-                    property_url=f"https://www.zillow.com{result['detailUrl']}",
-                    tax_assessed_value=int(home_info["taxAssessedValue"]) if "taxAssessedValue" in home_info else None,
-                    property_type=PropertyType(home_info.get("homeType")),
-                    listing_type=ListingType(
-                        home_info["statusType"] if "statusType" in home_info else self.listing_type
-                    ),
-                    status_text=result.get("statusText"),
-                    posted_time=self.parse_posted_time(result["variableData"]["text"])
-                    if "variableData" in result
-                       and "text" in result["variableData"]
-                       and result["variableData"]["type"] == "TIME_ON_INFO"
-                    else None,
-                    price_min=home_info.get("price"),
-                    price_max=home_info.get("price"),
-                    beds_min=int(home_info["bedrooms"]) if "bedrooms" in home_info else None,
-                    beds_max=int(home_info["bedrooms"]) if "bedrooms" in home_info else None,
-                    baths_min=home_info.get("bathrooms"),
-                    baths_max=home_info.get("bathrooms"),
-                    sqft_min=int(home_info["livingArea"]) if "livingArea" in home_info else None,
-                    sqft_max=int(home_info["livingArea"]) if "livingArea" in home_info else None,
-                    price_per_sqft=int(home_info["price"] // home_info["livingArea"])
-                    if "livingArea" in home_info and home_info["livingArea"] != 0 and "price" in home_info
-                    else None,
-                    latitude=result["latLong"]["latitude"],
-                    longitude=result["latLong"]["longitude"],
-                    lot_area_value=round(home_info["lotAreaValue"], 2) if "lotAreaValue" in home_info else None,
-                    lot_area_unit=home_info.get("lotAreaUnit"),
-                    img_src=result.get("imgSrc"),
-                )
-
-                properties_list.append(property_obj)
-
-            elif "isBuilding" in result:
-                price_string = result["price"].replace("$", "").replace(",", "").replace("+/mo", "")
-
-                match = re.search(r"(\d+)", price_string)
-                price_value = int(match.group(1)) if match else None
-                building_obj = Property(
-                    property_url=f"https://www.zillow.com{result['detailUrl']}",
-                    site_name=self.site_name,
-                    property_type=PropertyType("BUILDING"),
-                    listing_type=ListingType(result["statusType"]),
-                    img_src=result.get("imgSrc"),
-                    address=self._extract_address(result["address"]),
-                    baths_min=result.get("minBaths"),
-                    area_min=result.get("minArea"),
-                    bldg_name=result.get("communityName"),
-                    status_text=result.get("statusText"),
-                    price_min=price_value if "+/mo" in result.get("price") else None,
-                    price_max=price_value if "+/mo" in result.get("price") else None,
-                    latitude=result.get("latLong", {}).get("latitude"),
-                    longitude=result.get("latLong", {}).get("longitude"),
-                    unit_count=result.get("unitCount"),
-                )
-
-                properties_list.append(building_obj)
-
-        return properties_list
-
-    def _get_single_property_page(self, property_data: dict):
-        """
-        This method is used when a user enters the exact location & zillow returns just one property
-        """
-        url = (
-            f"https://www.zillow.com{property_data['hdpUrl']}"
-            if "zillow.com" not in property_data["hdpUrl"]
-            else property_data["hdpUrl"]
-        )
-        address_data = property_data["address"]
-        address_one, address_two = parse_address_one(address_data["streetAddress"])
-        address = Address(
-            address_one=address_one,
-            address_two=address_two if address_two else "#",
-            city=address_data["city"],
-            state=address_data["state"],
-            zip_code=address_data["zipcode"],
-        )
-        property_type = property_data.get("homeType", None)
-        return Property(
-            site_name=self.site_name,
-            property_url=url,
-            property_type=PropertyType(property_type) if property_type in PropertyType.__members__ else None,
-            listing_type=self.listing_type,
-            address=address,
-            year_built=property_data.get("yearBuilt"),
-            tax_assessed_value=property_data.get("taxAssessedValue"),
-            lot_area_value=property_data.get("lotAreaValue"),
-            lot_area_unit=property_data["lotAreaUnits"].lower() if "lotAreaUnits" in property_data else None,
-            agent=Agent(
-                name=property_data.get("attributionInfo", {}).get("agentName")
-            ),
-            stories=property_data.get("resoFacts", {}).get("stories"),
-            mls_id=property_data.get("attributionInfo", {}).get("mlsId"),
-            beds_min=property_data.get("bedrooms"),
-            beds_max=property_data.get("bedrooms"),
-            baths_min=property_data.get("bathrooms"),
-            baths_max=property_data.get("bathrooms"),
-            price_min=property_data.get("price"),
-            price_max=property_data.get("price"),
-            sqft_min=property_data.get("livingArea"),
-            sqft_max=property_data.get("livingArea"),
-            price_per_sqft=property_data.get("resoFacts", {}).get("pricePerSquareFoot"),
-            latitude=property_data.get("latitude"),
-            longitude=property_data.get("longitude"),
-            img_src=property_data.get("streetViewTileImageUrlMediumAddress"),
-            description=property_data.get("description"),
-        )
-
-    def _extract_address(self, address_str):
-        """
-        Extract address components from a string formatted like '555 Wedglea Dr, Dallas, TX',
-        and return an Address object.
-        """
-        parts = address_str.split(", ")
-
-        if len(parts) != 3:
-            raise ValueError(f"Unexpected address format: {address_str}")
-
-        address_one = parts[0].strip()
-        city = parts[1].strip()
-        state_zip = parts[2].split(" ")
-
-        if len(state_zip) == 1:
-            state = state_zip[0].strip()
-            zip_code = None
-        elif len(state_zip) == 2:
-            state = state_zip[0].strip()
-            zip_code = state_zip[1].strip()
-        else:
-            raise ValueError(f"Unexpected state/zip format in address: {address_str}")
-
-        address_one, address_two = parse_address_one(address_one)
-        return Address(
-            address_one=address_one,
-            address_two=address_two if address_two else "#",
-            city=city,
-            state=state,
-            zip_code=zip_code,
-        )
diff --git a/homeharvest/exceptions.py b/homeharvest/exceptions.py
index 95eedbc..55a5dca 100644
--- a/homeharvest/exceptions.py
+++ b/homeharvest/exceptions.py
@@ -16,3 +16,7 @@ class GeoCoordsNotFound(Exception):
 
 class SearchTooBroad(Exception):
     """Raised when the search is too broad"""
+
+
+class InvalidTimeFrame(Exception):
+    """Raised when the time frame is invalid"""
diff --git a/homeharvest/utils.py b/homeharvest/utils.py
index 2aeedee..603f9b4 100644
--- a/homeharvest/utils.py
+++ b/homeharvest/utils.py
@@ -1,38 +1,76 @@
-import re
+from .core.scrapers.models import Property
+import pandas as pd
+
+ordered_properties = [
+    "PropertyURL",
+    "MLS",
+    "MLS #",
+    "Status",
+    "Style",
+    "Street",
+    "Unit",
+    "City",
+    "State",
+    "Zip",
+    "Beds",
+    "FB",
+    "NumHB",
+    "EstSF",
+    "YrBlt",
+    "ListPrice",
+    "Lst Date",
+    "Sold Price",
+    "COEDate",
+    "LotSFApx",
+    "PrcSqft",
+    "LATITUDE",
+    "LONGITUDE",
+    "Stories",
+    "HOAFee",
+    "PrkgGar",
+    "Community",
+]
 
 
-def parse_address_one(street_address: str) -> tuple:
-    if not street_address:
-        return street_address, "#"
+def process_result(result: Property) -> pd.DataFrame:
+    prop_data = {prop: None for prop in ordered_properties}
+    prop_data.update(result.__dict__)
+    prop_data["PropertyURL"] = prop_data["property_url"]
+    prop_data["MLS"] = prop_data["mls"]
+    prop_data["MLS #"] = prop_data["mls_id"]
+    prop_data["Status"] = prop_data["status"]
+    prop_data["Style"] = prop_data["style"]
 
-    apt_match = re.search(
-        r"(APT\s*[\dA-Z]+|#[\dA-Z]+|UNIT\s*[\dA-Z]+|LOT\s*[\dA-Z]+|SUITE\s*[\dA-Z]+)$",
-        street_address,
-        re.I,
-    )
+    if "address" in prop_data:
+        address_data = prop_data["address"]
+        prop_data["Street"] = address_data.street
+        prop_data["Unit"] = address_data.unit
+        prop_data["City"] = address_data.city
+        prop_data["State"] = address_data.state
+        prop_data["Zip"] = address_data.zip
 
-    if apt_match:
-        apt_str = apt_match.group().strip()
-        cleaned_apt_str = re.sub(r"(APT\s*|UNIT\s*|LOT\s*|SUITE\s*)", "#", apt_str, flags=re.I)
+    prop_data["Community"] = prop_data["neighborhoods"]
+    prop_data["Beds"] = prop_data["beds"]
+    prop_data["FB"] = prop_data["baths_full"]
+    prop_data["NumHB"] = prop_data["baths_half"]
+    prop_data["EstSF"] = prop_data["est_sf"]
+    prop_data["ListPrice"] = prop_data["list_price"]
+    prop_data["Lst Date"] = prop_data["list_date"]
+    prop_data["Sold Price"] = prop_data["sold_price"]
+    prop_data["COEDate"] = prop_data["last_sold_date"]
+    prop_data["LotSFApx"] = prop_data["lot_sf"]
+    prop_data["HOAFee"] = prop_data["hoa_fee"]
 
-        main_address = street_address.replace(apt_str, "").strip()
-        return main_address, cleaned_apt_str
-    else:
-        return street_address, "#"
+    if prop_data.get("prc_sqft") is not None:
+        prop_data["PrcSqft"] = round(prop_data["prc_sqft"], 2)
 
+    prop_data["YrBlt"] = prop_data["yr_blt"]
+    prop_data["LATITUDE"] = prop_data["latitude"]
+    prop_data["LONGITUDE"] = prop_data["longitude"]
+    prop_data["Stories"] = prop_data["stories"]
+    prop_data["PrkgGar"] = prop_data["prkg_gar"]
 
-def parse_address_two(street_address: str):
-    if not street_address:
-        return "#"
-    apt_match = re.search(
-        r"(APT\s*[\dA-Z]+|#[\dA-Z]+|UNIT\s*[\dA-Z]+|LOT\s*[\dA-Z]+|SUITE\s*[\dA-Z]+)$",
-        street_address,
-        re.I,
-    )
+    properties_df = pd.DataFrame([prop_data])
+    properties_df = properties_df.reindex(columns=ordered_properties)
 
-    if apt_match:
-        apt_str = apt_match.group().strip()
-        apt_str = re.sub(r"(APT\s*|UNIT\s*|LOT\s*|SUITE\s*)", "#", apt_str, flags=re.I)
-        return apt_str
-    else:
-        return "#"
+    return properties_df[ordered_properties]
diff --git a/tests/test_realtor.py b/tests/test_realtor.py
index 3b23529..557ca64 100644
--- a/tests/test_realtor.py
+++ b/tests/test_realtor.py
@@ -12,13 +12,13 @@ def test_realtor():
         scrape_property(
             location="2530 Al Lipscomb Way",
             site_name="realtor.com",
-            listing_type="for_sale",
+            status="for_sale",
         ),
         scrape_property(
-            location="Phoenix, AZ", site_name=["realtor.com"], listing_type="for_rent"
+            location="Phoenix, AZ", site_name=["realtor.com"], status="for_rent"
         ),  #: does not support "city, state, USA" format
         scrape_property(
-            location="Dallas, TX", site_name="realtor.com", listing_type="sold"
+            location="Dallas, TX", site_name="realtor.com", status="sold"
         ),  #: does not support "city, state, USA" format
         scrape_property(location="85281", site_name="realtor.com"),
     ]
@@ -31,7 +31,7 @@ def test_realtor():
             scrape_property(
                 location="abceefg ju098ot498hh9",
                 site_name="realtor.com",
-                listing_type="for_sale",
+                status="sale",
             )
         ]
     except (InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound):
diff --git a/tests/test_redfin.py b/tests/test_redfin.py
index 6904499..421a7f0 100644
--- a/tests/test_redfin.py
+++ b/tests/test_redfin.py
@@ -10,10 +10,14 @@ from homeharvest.exceptions import (
 
 def test_redfin():
     results = [
-        scrape_property(location="San Diego", site_name="redfin", listing_type="for_sale"),
-        scrape_property(location="2530 Al Lipscomb Way", site_name="redfin", listing_type="for_sale"),
-        scrape_property(location="Phoenix, AZ, USA", site_name=["redfin"], listing_type="for_rent"),
-        scrape_property(location="Dallas, TX, USA", site_name="redfin", listing_type="sold"),
+        scrape_property(location="San Diego", site_name="redfin", status="sale"),
+        scrape_property(
+            location="2530 Al Lipscomb Way", site_name="redfin", status="sale"
+        ),
+        scrape_property(
+            location="Phoenix, AZ, USA", site_name=["redfin"], status="rent"
+        ),
+        scrape_property(location="Dallas, TX, USA", site_name="redfin", status="sold"),
         scrape_property(location="85281", site_name="redfin"),
     ]
 
@@ -25,11 +29,17 @@ def test_redfin():
             scrape_property(
                 location="abceefg ju098ot498hh9",
                 site_name="redfin",
-                listing_type="for_sale",
+                status="sale",
             ),
-            scrape_property(location="Florida", site_name="redfin", listing_type="for_rent"),
+            scrape_property(location="Florida", site_name="redfin", status="for_rent"),
         ]
-    except (InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound, SearchTooBroad):
+    except (
+        InvalidSite,
+        InvalidListingType,
+        NoResultsFound,
+        GeoCoordsNotFound,
+        SearchTooBroad,
+    ):
         assert True
 
     assert all([result is None for result in bad_results])
diff --git a/tests/test_utils.py b/tests/test_utils.py
index d21ee77..ed9f0ef 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -17,7 +17,13 @@ def test_parse_address_one():
 
 
 def test_parse_address_two():
-    test_data = [("Apt 126", "#126"), ("apt 2B", "#2B"), ("UNIT 3A", "#3A"), ("unit 3A", "#3A"), ("SuIte 3A", "#3A")]
+    test_data = [
+        ("Apt 126", "#126"),
+        ("apt 2B", "#2B"),
+        ("UNIT 3A", "#3A"),
+        ("unit 3A", "#3A"),
+        ("SuIte 3A", "#3A"),
+    ]
 
     for input_data, expected in test_data:
         output = parse_address_two(input_data)
diff --git a/tests/test_zillow.py b/tests/test_zillow.py
index dfcc55d..6f8e27d 100644
--- a/tests/test_zillow.py
+++ b/tests/test_zillow.py
@@ -9,12 +9,24 @@ from homeharvest.exceptions import (
 
 def test_zillow():
     results = [
-        scrape_property(location="2530 Al Lipscomb Way", site_name="zillow", listing_type="for_sale"),
-        scrape_property(location="Phoenix, AZ, USA", site_name=["zillow"], listing_type="for_rent"),
-        scrape_property(location="Surprise, AZ", site_name=["zillow"], listing_type="for_sale"),
-        scrape_property(location="Dallas, TX, USA", site_name="zillow", listing_type="sold"),
+        scrape_property(
+            location="2530 Al Lipscomb Way", site_name="zillow", listing_type="for_sale"
+        ),
+        scrape_property(
+            location="Phoenix, AZ, USA", site_name=["zillow"], listing_type="for_rent"
+        ),
+        scrape_property(
+            location="Surprise, AZ", site_name=["zillow"], listing_type="for_sale"
+        ),
+        scrape_property(
+            location="Dallas, TX, USA", site_name="zillow", listing_type="sold"
+        ),
         scrape_property(location="85281", site_name="zillow"),
-        scrape_property(location="3268 88th st s, Lakewood", site_name="zillow", listing_type="for_rent"),
+        scrape_property(
+            location="3268 88th st s, Lakewood",
+            site_name="zillow",
+            listing_type="for_rent",
+        ),
     ]
 
     assert all([result is not None for result in results])