From 7037dc9b15bfe3b5199a8b0db1993961df621cf9 Mon Sep 17 00:00:00 2001
From: Cullen <cullen@cullenwatson.com>
Date: Tue, 16 Apr 2024 15:07:50 -0500
Subject: [PATCH] enh: add agent

---
 .pre-commit-config.yaml                       |  21 ++
 homeharvest/core/scrapers/realtor/__init__.py | 207 ++++++++++--------
 2 files changed, 135 insertions(+), 93 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..95af30d
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+---
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.2.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-added-large-files
+      - id: check-yaml
+  - repo: https://github.com/adrienverge/yamllint
+    rev: v1.29.0
+    hooks:
+      - id: yamllint
+        verbose: true # create awareness of linter findings
+        args: ["-d", "{extends: relaxed, rules: {line-length: {max: 120}}}"]
+  - repo: https://github.com/psf/black
+    rev: 24.2.0
+    hooks:
+    - id: black
+      language_version: python
+      args: [--line-length=120, --quiet]
diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py
index e298ecc..3f22e8b 100644
--- a/homeharvest/core/scrapers/realtor/__init__.py
+++ b/homeharvest/core/scrapers/realtor/__init__.py
@@ -4,18 +4,21 @@ homeharvest.realtor.__init__
 
 This module implements the scraper for realtor.com
 """
+
 from datetime import datetime
 from typing import Dict, Union, Optional
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 from .. import Scraper
-from ..models import Property, Address, ListingType, Description, PropertyType
+from ..models import Property, Address, ListingType, Description, PropertyType, Agent
 
 
 class RealtorScraper(Scraper):
     SEARCH_GQL_URL = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
     PROPERTY_URL = "https://www.realtor.com/realestateandhomes-detail/"
+    PROPERTY_GQL = "https://graph.realtor.com/graphql"
     ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest"
+    NUM_PROPERTY_WORKERS = 20
 
     def __init__(self, scraper_input):
         super().__init__(scraper_input)
@@ -110,15 +113,17 @@ class RealtorScraper(Scraper):
         )
 
         able_to_get_lat_long = (
-                property_info
-                and property_info.get("address")
-                and property_info["address"].get("location")
-                and property_info["address"]["location"].get("coordinate")
+            property_info
+            and property_info.get("address")
+            and property_info["address"].get("location")
+            and property_info["address"]["location"].get("coordinate")
+        )
+        list_date_str = (
+            property_info["basic"]["list_date"].split("T")[0] if property_info["basic"].get("list_date") else None
+        )
+        last_sold_date_str = (
+            property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get("sold_date") else None
         )
-        list_date_str = property_info["basic"]["list_date"].split("T")[0] if property_info["basic"].get(
-            "list_date") else None
-        last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get(
-            "sold_date") else None
         pending_date_str = property_info["pending_date"].split("T")[0] if property_info.get("pending_date") else None
 
         list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None
@@ -131,33 +136,33 @@ class RealtorScraper(Scraper):
         if list_date:
             if status == "sold" and last_sold_date:
                 days_on_mls = (last_sold_date - list_date).days
-            elif status in ('for_sale', 'for_rent'):
+            elif status in ("for_sale", "for_rent"):
                 days_on_mls = (today - list_date).days
             if days_on_mls and days_on_mls < 0:
                 days_on_mls = None
 
+        property_id = property_info["details"]["permalink"]
+        agents = self.get_agents(property_id)
         listing = Property(
             mls=mls,
-            mls_id=property_info["source"].get("listing_id")
-            if "source" in property_info and isinstance(property_info["source"], dict)
-            else None,
-            property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
+            mls_id=(
+                property_info["source"].get("listing_id")
+                if "source" in property_info and isinstance(property_info["source"], dict)
+                else None
+            ),
+            property_url=f"{self.PROPERTY_URL}{property_id}",
             status=property_info["basic"]["status"].upper(),
             list_price=property_info["basic"]["price"],
             list_date=list_date,
-            prc_sqft=property_info["basic"].get("price")
-                     / property_info["basic"].get("sqft")
-            if property_info["basic"].get("price")
-               and property_info["basic"].get("sqft")
-            else None,
+            prc_sqft=(
+                property_info["basic"].get("price") / property_info["basic"].get("sqft")
+                if property_info["basic"].get("price") and property_info["basic"].get("sqft")
+                else None
+            ),
             last_sold_date=last_sold_date,
             pending_date=pending_date,
-            latitude=property_info["address"]["location"]["coordinate"].get("lat")
-            if able_to_get_lat_long
-            else None,
-            longitude=property_info["address"]["location"]["coordinate"].get("lon")
-            if able_to_get_lat_long
-            else None,
+            latitude=property_info["address"]["location"]["coordinate"].get("lat") if able_to_get_lat_long else None,
+            longitude=property_info["address"]["location"]["coordinate"].get("lon") if able_to_get_lat_long else None,
             address=self._parse_address(property_info, search_type="handle_listing"),
             description=Description(
                 alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])),
@@ -172,7 +177,8 @@ class RealtorScraper(Scraper):
                 garage=property_info["details"].get("garage"),
                 stories=property_info["details"].get("stories"),
             ),
-            days_on_mls=days_on_mls
+            days_on_mls=days_on_mls,
+            agents=agents,
         )
 
         return [listing]
@@ -266,6 +272,7 @@ class RealtorScraper(Scraper):
                 }"""
 
         variables = {"property_id": property_id}
+        agents = self.get_agents(property_id)
 
         payload = {
             "query": query,
@@ -281,16 +288,13 @@ class RealtorScraper(Scraper):
             Property(
                 mls_id=property_id,
                 property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
-                address=self._parse_address(
-                    property_info, search_type="handle_address"
-                ),
+                address=self._parse_address(property_info, search_type="handle_address"),
                 description=self._parse_description(property_info),
+                agents=agents,
             )
         ]
 
-    def general_search(
-            self, variables: dict, search_type: str
-    ) -> Dict[str, Union[int, list[Property]]]:
+    def general_search(self, variables: dict, search_type: str) -> Dict[str, Union[int, list[Property]]]:
         """
         Handles a location area & returns a list of properties
         """
@@ -380,17 +384,15 @@ class RealtorScraper(Scraper):
         )
 
         pending_or_contingent_param = (
-            "or_filters: { contingent: true, pending: true }"
-            if self.listing_type == ListingType.PENDING
-            else ""
+            "or_filters: { contingent: true, pending: true }" if self.listing_type == ListingType.PENDING else ""
         )
 
         listing_type = ListingType.FOR_SALE if self.listing_type == ListingType.PENDING else self.listing_type
         is_foreclosure = ""
 
-        if variables.get('foreclosure') is True:
+        if variables.get("foreclosure") is True:
             is_foreclosure = "foreclosure: true"
-        elif variables.get('foreclosure') is False:
+        elif variables.get("foreclosure") is False:
             is_foreclosure = "foreclosure: false"
 
         if search_type == "comps":  #: comps search, came from an address
@@ -400,11 +402,11 @@ class RealtorScraper(Scraper):
                     $offset: Int!,
                     ) {
                         home_search(
-                            query: { 
+                            query: {
                                 %s
                                 nearby: {
                                     coordinates: $coordinates
-                                    radius: $radius 
+                                    radius: $radius
                                 }
                                 status: %s
                                 %s
@@ -453,7 +455,7 @@ class RealtorScraper(Scraper):
             )
         else:  #: general search, came from an address
             query = (
-                    """query Property_search(
+                """query Property_search(
                         $property_id: [ID]!
                         $offset: Int!,
                     ) {
@@ -464,7 +466,7 @@ class RealtorScraper(Scraper):
                             limit: 1
                             offset: $offset
                         ) %s"""
-                    % results_query
+                % results_query
             )
 
         payload = {
@@ -480,64 +482,72 @@ class RealtorScraper(Scraper):
         properties: list[Property] = []
 
         if (
-                response_json is None
-                or "data" not in response_json
-                or response_json["data"] is None
-                or search_key not in response_json["data"]
-                or response_json["data"][search_key] is None
-                or "results" not in response_json["data"][search_key]
+            response_json is None
+            or "data" not in response_json
+            or response_json["data"] is None
+            or search_key not in response_json["data"]
+            or response_json["data"][search_key] is None
+            or "results" not in response_json["data"][search_key]
         ):
             return {"total": 0, "properties": []}
 
-        for result in response_json["data"][search_key]["results"]:
-            mls = (
-                result["source"].get("id")
-                if "source" in result and isinstance(result["source"], dict)
-                else None
-            )
+        def process_property(result: dict) -> Property | None:
+            mls = result["source"].get("id") if "source" in result and isinstance(result["source"], dict) else None
 
             if not mls and self.mls_only:
-                continue
+                return
 
             able_to_get_lat_long = (
-                    result
-                    and result.get("location")
-                    and result["location"].get("address")
-                    and result["location"]["address"].get("coordinate")
+                result
+                and result.get("location")
+                and result["location"].get("address")
+                and result["location"]["address"].get("coordinate")
             )
 
             is_pending = result["flags"].get("is_pending") or result["flags"].get("is_contingent")
 
             if is_pending and self.listing_type != ListingType.PENDING:
-                continue
+                return
+
+            property_id = result["property_id"]
+            agents = self.get_agents(property_id)
 
             realty_property = Property(
                 mls=mls,
-                mls_id=result["source"].get("listing_id")
-                if "source" in result and isinstance(result["source"], dict)
-                else None,
-                property_url=f"{self.PROPERTY_URL}{result['property_id']}" if self.listing_type != ListingType.FOR_RENT else f"{self.PROPERTY_URL}M{result['property_id']}?listing_status=rental",
+                mls_id=(
+                    result["source"].get("listing_id")
+                    if "source" in result and isinstance(result["source"], dict)
+                    else None
+                ),
+                property_url=(
+                    f"{self.PROPERTY_URL}{property_id}"
+                    if self.listing_type != ListingType.FOR_RENT
+                    else f"{self.PROPERTY_URL}M{property_id}?listing_status=rental"
+                ),
                 status="PENDING" if is_pending else result["status"].upper(),
                 list_price=result["list_price"],
-                list_date=result["list_date"].split("T")[0]
-                if result.get("list_date")
-                else None,
+                list_date=result["list_date"].split("T")[0] if result.get("list_date") else None,
                 prc_sqft=result.get("price_per_sqft"),
                 last_sold_date=result.get("last_sold_date"),
-                hoa_fee=result["hoa"]["fee"]
-                if result.get("hoa") and isinstance(result["hoa"], dict)
-                else None,
-                latitude=result["location"]["address"]["coordinate"].get("lat")
-                if able_to_get_lat_long
-                else None,
-                longitude=result["location"]["address"]["coordinate"].get("lon")
-                if able_to_get_lat_long
-                else None,
+                hoa_fee=result["hoa"]["fee"] if result.get("hoa") and isinstance(result["hoa"], dict) else None,
+                latitude=result["location"]["address"]["coordinate"].get("lat") if able_to_get_lat_long else None,
+                longitude=result["location"]["address"]["coordinate"].get("lon") if able_to_get_lat_long else None,
                 address=self._parse_address(result, search_type="general_search"),
                 description=self._parse_description(result),
-                days_on_mls=self.calculate_days_on_mls(result)
+                days_on_mls=self.calculate_days_on_mls(result),
+                agents=agents,
             )
-            properties.append(realty_property)
+            return realty_property
+
+        with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:
+            futures = [
+                executor.submit(process_property, result) for result in response_json["data"][search_key]["results"]
+            ]
+
+            for future in as_completed(futures):
+                result = future.result()
+                if result:
+                    properties.append(result)
 
         return {
             "total": response_json["data"][search_key]["total"],
@@ -558,18 +568,14 @@ class RealtorScraper(Scraper):
         search_type = (
             "comps"
             if self.radius and location_type == "address"
-            else "address"
-            if location_type == "address" and not self.radius
-            else "area"
+            else "address" if location_type == "address" and not self.radius else "area"
         )
         if location_type == "address":
             if not self.radius:  #: single address search, non comps
                 property_id = location_info["mpr_id"]
                 search_variables |= {"property_id": property_id}
 
-                gql_results = self.general_search(
-                    search_variables, search_type=search_type
-                )
+                gql_results = self.general_search(search_variables, search_type=search_type)
                 if gql_results["total"] == 0:
                     listing_id = self.get_latest_listing_id(property_id)
                     if listing_id is None:
@@ -603,7 +609,7 @@ class RealtorScraper(Scraper):
             }
 
         if self.foreclosure:
-            search_variables['foreclosure'] = self.foreclosure
+            search_variables["foreclosure"] = self.foreclosure
 
         result = self.general_search(search_variables, search_type=search_type)
         total = result["total"]
@@ -624,6 +630,19 @@ class RealtorScraper(Scraper):
 
         return homes
 
+    def get_agents(self, property_id: str) -> list[Agent]:
+        payload = f'{{"query":"query GetHome($property_id: ID!) {{\\n  home(property_id: $property_id) {{\\n    __typename\\n\\n    consumerAdvertisers: consumer_advertisers {{\\n      __typename\\n      type\\n      advertiserId: advertiser_id\\n      name\\n      phone\\n      type\\n      href\\n      slogan\\n      photo {{\\n        __typename\\n        href\\n      }}\\n      showRealtorLogo: show_realtor_logo\\n      hours\\n    }}\\n\\n\\n  }}\\n}}\\n","variables":{{"property_id":"{property_id}"}}}}'
+        response = self.session.post(self.PROPERTY_GQL, data=payload)
+
+        data = response.json()
+        try:
+            ads = data["data"]["home"]["consumerAdvertisers"]
+        except (KeyError, TypeError):
+            return []
+
+        agents = [Agent(name=ad["name"], phone=ad["phone"]) for ad in ads]
+        return agents
+
     @staticmethod
     def _parse_neighborhoods(result: dict) -> Optional[str]:
         neighborhoods_list = []
@@ -646,17 +665,19 @@ class RealtorScraper(Scraper):
 
     def _parse_address(self, result: dict, search_type):
         if search_type == "general_search":
-            address = result['location']['address']
+            address = result["location"]["address"]
         else:
             address = result["address"]
 
         return Address(
-            street=" ".join([
-                self.handle_none_safely(address.get('street_number')),
-                self.handle_none_safely(address.get('street_direction')),
-                self.handle_none_safely(address.get('street_name')),
-                self.handle_none_safely(address.get('street_suffix')),
-            ]).strip(),
+            street=" ".join(
+                [
+                    self.handle_none_safely(address.get("street_number")),
+                    self.handle_none_safely(address.get("street_direction")),
+                    self.handle_none_safely(address.get("street_name")),
+                    self.handle_none_safely(address.get("street_suffix")),
+                ]
+            ).strip(),
             unit=address["unit"],
             city=address["city"],
             state=address["state_code"],
@@ -705,12 +726,12 @@ class RealtorScraper(Scraper):
         today = datetime.now()
 
         if list_date:
-            if result["status"] == 'sold':
+            if result["status"] == "sold":
                 if last_sold_date:
                     days = (last_sold_date - list_date).days
                     if days >= 0:
                         return days
-            elif result["status"] in ('for_sale', 'for_rent'):
+            elif result["status"] in ("for_sale", "for_rent"):
                 days = (today - list_date).days
                 if days >= 0:
                     return days