From 0b283e18bdb571ecb8834c6193ec15c1f1229dde Mon Sep 17 00:00:00 2001 From: Zachary Hampton Date: Thu, 4 Dec 2025 18:56:10 -0800 Subject: [PATCH 1/3] Fix 403 error from Realtor.com API changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update GraphQL endpoint to api.frontdoor.realtor.com - Update HTTP headers with newer Chrome version and correct client name/version - Improve error handling in handle_home method - Fix response validation for missing/null data 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- homeharvest/core/scrapers/__init__.py | 30 +++++++------- homeharvest/core/scrapers/realtor/__init__.py | 41 +++++++++++-------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index c9b4f3b..b91ed8a 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -81,21 +81,21 @@ class Scraper: Scraper.session.mount("https://", adapter) Scraper.session.headers.update( { - "accept": "application/json, text/javascript", - "accept-language": "en-US,en;q=0.9", - "cache-control": "no-cache", - "content-type": "application/json", - "origin": "https://www.realtor.com", - "pragma": "no-cache", - "priority": "u=1, i", - "rdc-ab-tests": "commute_travel_time_variation:v1", - "sec-ch-ua": '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"', - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-platform": '"Windows"', - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", + 'sec-ch-ua-platform': '"macOS"', + 'rdc-client-name': 'rdc-search-for-sale-desktop', + 'sec-ch-ua': '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"', + 'sec-ch-ua-mobile': '?0', + 'rdc-client-version': '0.1.0', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36', + 'accept': 'application/json', + 'content-type': 'application/json', + 'origin': 'https://www.realtor.com', + 'sec-fetch-site': 'same-site', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', + 'referer': 'https://www.realtor.com/', + 'accept-language': 'en-US,en;q=0.9', + 'priority': 'u=1, i', } ) diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 39ffbdc..b609c0f 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -35,9 +35,7 @@ from .processors import ( class RealtorScraper(Scraper): - SEARCH_GQL_URL = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta" - PROPERTY_URL = "https://www.realtor.com/realestateandhomes-detail/" - PROPERTY_GQL = "https://graph.realtor.com/graphql" + SEARCH_GQL_URL = "https://api.frontdoor.realtor.com/graphql" ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest" NUM_PROPERTY_WORKERS = 20 DEFAULT_PAGE_SIZE = 200 @@ -108,6 +106,7 @@ class RealtorScraper(Scraper): return property_info["listings"][0]["listing_id"] def handle_home(self, property_id: str) -> list[Property]: + """Fetch single home with proper error handling.""" query = ( """query Home($property_id: ID!) { home(property_id: $property_id) %s @@ -116,23 +115,33 @@ class RealtorScraper(Scraper): ) variables = {"property_id": property_id} - payload = { - "query": query, - "variables": variables, - } + payload = {"query": query, "variables": variables} - response = self.session.post(self.SEARCH_GQL_URL, json=payload) - response_json = response.json() + try: + response = self.session.post(self.SEARCH_GQL_URL, json=payload) + data = response.json() - property_info = response_json["data"]["home"] + # Check for errors or missing data + if "errors" in data or "data" not in data: + return [] - if self.return_type != ReturnType.raw: - return [process_property(property_info, self.mls_only, self.extra_property_data, - self.exclude_pending, self.listing_type, get_key, process_extra_property_details)] - else: - return [property_info] + if data["data"] is None or "home" not in data["data"]: + return [] + property_info = data["data"]["home"] + if property_info is None: + return [] + # Process based on return type + if self.return_type != ReturnType.raw: + return [process_property(property_info, self.mls_only, self.extra_property_data, + self.exclude_pending, self.listing_type, get_key, + process_extra_property_details)] + else: + return [property_info] + + except Exception: + return [] def general_search(self, variables: dict, search_type: str) -> Dict[str, Union[int, Union[list[Property], list[dict]]]]: """ @@ -366,7 +375,7 @@ class RealtorScraper(Scraper): $city: String, $county: [String], $state_code: String, - $postal_code: String + $postal_code: String, $offset: Int, ) { home_search( From a8c9d0fd66e5f0289429646d338c3888e4f43663 Mon Sep 17 00:00:00 2001 From: zacharyhampton Date: Thu, 4 Dec 2025 21:08:01 -0800 Subject: [PATCH 2/3] Replace REST autocomplete with GraphQL Search_suggestions query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace /suggest REST endpoint with GraphQL Search_suggestions query - Use search_location field instead of individual city/county/state/postal_code fields - Fix coordinate order to [lon, lat] (GeoJSON standard) for radius searches - Extract mpr_id from addr: prefix for single address lookups 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- homeharvest/core/scrapers/realtor/__init__.py | 104 +++++++++++------- 1 file changed, 63 insertions(+), 41 deletions(-) diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index b609c0f..da743af 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -36,7 +36,6 @@ from .processors import ( class RealtorScraper(Scraper): SEARCH_GQL_URL = "https://api.frontdoor.realtor.com/graphql" - ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest" NUM_PROPERTY_WORKERS = 20 DEFAULT_PAGE_SIZE = 200 @@ -44,33 +43,70 @@ class RealtorScraper(Scraper): super().__init__(scraper_input) def handle_location(self): - # Get client_id from listing_type - if self.listing_type is None: - client_id = "for-sale" - elif isinstance(self.listing_type, list): - client_id = self.listing_type[0].value.lower().replace("_", "-") if self.listing_type else "for-sale" - else: - client_id = self.listing_type.value.lower().replace("_", "-") + query = """query Search_suggestions($searchInput: SearchSuggestionsInput!) { + search_suggestions(search_input: $searchInput) { + geo_results { + type + text + geo { + _id + area_type + city + state_code + postal_code + county + centroid { lat lon } + slug_id + geo_id + } + } + } + }""" - params = { - "input": self.location, - "client_id": client_id, - "limit": "1", - "area_types": "city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park", + variables = { + "searchInput": { + "search_term": self.location + } } - response = self.session.get( - self.ADDRESS_AUTOCOMPLETE_URL, - params=params, - ) + payload = { + "query": query, + "variables": variables, + } + + response = self.session.post(self.SEARCH_GQL_URL, json=payload) response_json = response.json() - result = response_json["autocomplete"] - - if not result: + if ( + response_json is None + or "data" not in response_json + or response_json["data"] is None + or "search_suggestions" not in response_json["data"] + or response_json["data"]["search_suggestions"] is None + or "geo_results" not in response_json["data"]["search_suggestions"] + or not response_json["data"]["search_suggestions"]["geo_results"] + ): return None - return result[0] + geo_result = response_json["data"]["search_suggestions"]["geo_results"][0] + geo = geo_result.get("geo", {}) + + result = { + "text": geo_result.get("text"), + "area_type": geo.get("area_type"), + "city": geo.get("city"), + "state_code": geo.get("state_code"), + "postal_code": geo.get("postal_code"), + "county": geo.get("county"), + "centroid": geo.get("centroid"), + } + + if geo.get("area_type") == "address": + geo_id = geo.get("_id", "") + if geo_id.startswith("addr:"): + result["mpr_id"] = geo_id.replace("addr:", "") + + return result def get_latest_listing_id(self, property_id: str) -> str | None: query = """query Property($property_id: ID!) { @@ -372,19 +408,13 @@ class RealtorScraper(Scraper): ) elif search_type == "area": #: general search, came from a general location query = """query Home_search( - $city: String, - $county: [String], - $state_code: String, - $postal_code: String, + $search_location: SearchLocation, $offset: Int, ) { home_search( query: { %s - city: $city - county: $county - postal_code: $postal_code - state_code: $state_code + search_location: $search_location %s %s %s @@ -520,24 +550,16 @@ class RealtorScraper(Scraper): if not location_info.get("centroid"): return [] - coordinates = list(location_info["centroid"].values()) + centroid = location_info["centroid"] + coordinates = [centroid["lon"], centroid["lat"]] # GeoJSON order: [lon, lat] search_variables |= { "coordinates": coordinates, "radius": "{}mi".format(self.radius), } - elif location_type == "postal_code": + else: #: general search (city, county, postal_code, etc.) search_variables |= { - "postal_code": location_info.get("postal_code"), - } - - else: #: general search, location - search_variables |= { - "city": location_info.get("city"), - "county": location_info.get("county"), - "state_code": location_info.get("state_code"), - "postal_code": location_info.get("postal_code"), - + "search_location": {"location": location_info.get("text")}, } if self.foreclosure: From 406ff97260946d960403819159aec5f29d158f3e Mon Sep 17 00:00:00 2001 From: zacharyhampton Date: Thu, 4 Dec 2025 23:08:37 -0800 Subject: [PATCH 3/3] - version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6474b82..69bd99a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.8.5" +version = "0.8.6b" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest"