From a1c1bcc82269bf39e8e991ebf25a240a42d2cf60 Mon Sep 17 00:00:00 2001 From: zacharyhampton Date: Sun, 21 Dec 2025 16:03:57 -0700 Subject: [PATCH] Version bump to 0.8.15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- homeharvest/core/scrapers/__init__.py | 19 ++-- homeharvest/core/scrapers/realtor/__init__.py | 15 ++- homeharvest/core/scrapers/realtor/queries.py | 94 ++++++++++++++++++- pyproject.toml | 2 +- 4 files changed, 111 insertions(+), 19 deletions(-) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 9bffe94..a4de131 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -5,7 +5,6 @@ import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry import uuid -import secrets from ...exceptions import AuthenticationError from .models import Property, ListingType, SiteName, SearchPropertyType, ReturnType import json @@ -83,15 +82,19 @@ class Scraper: Scraper.session.headers.update( { 'Content-Type': 'application/json', - 'apollographql-client-version': '26.11.1-26.11.1.1106489', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.9', - 'rdc-client-version': '26.11.1', - 'X-APOLLO-OPERATION-TYPE': 'query', - 'X-APOLLO-OPERATION-ID': secrets.token_hex(32), - 'rdc-client-name': 'RDC_NATIVE_MOBILE-iPhone-com.move.Realtor', - 'apollographql-client-name': 'com.move.Realtor-apollo-ios', - 'User-Agent': 'Realtor.com/26.11.1.1106489 CFNetwork/3860.200.71 Darwin/25.1.0', + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache', + 'rdc-client-name': 'rdc-home', + 'rdc-client-version': '2.68.0', + 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-site', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36', } ) diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index a6f90c5..9bb3dc9 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -29,7 +29,7 @@ from ..models import ( ListingType, ReturnType ) -from .queries import GENERAL_RESULTS_QUERY, SEARCH_HOMES_DATA, HOMES_DATA, HOME_FRAGMENT, SEARCH_RESULTS_FRAGMENT, LISTING_PHOTOS_FRAGMENT, MORPHEUS_SUGGESTIONS_QUERY +from .queries import GENERAL_RESULTS_QUERY, SEARCH_HOMES_DATA, HOMES_DATA, HOME_FRAGMENT, SEARCH_RESULTS_FRAGMENT, LISTING_PHOTOS_FRAGMENT, SEARCH_SUGGESTIONS_QUERY from .processors import ( process_property, process_extra_property_details, @@ -38,7 +38,7 @@ from .processors import ( class RealtorScraper(Scraper): - SEARCH_GQL_URL = "https://api.frontdoor.realtor.com/graphql" + SEARCH_GQL_URL = "https://www.realtor.com/frontdoor/graphql" NUM_PROPERTY_WORKERS = 20 DEFAULT_PAGE_SIZE = 200 @@ -53,21 +53,18 @@ class RealtorScraper(Scraper): def _graphql_post(self, query: str, variables: dict, operation_name: str) -> dict: """ - Execute a GraphQL query with operation-specific headers. + Execute a GraphQL query. Args: query: GraphQL query string (must include operationName matching operation_name param) variables: Query variables dictionary - operation_name: Name of the GraphQL operation for Apollo headers + operation_name: Name of the GraphQL operation Returns: Response JSON dictionary """ - # Set operation-specific header (must match query's operationName) - self.session.headers['X-APOLLO-OPERATION-NAME'] = operation_name - payload = { - "operationName": operation_name, # Include in payload + "operationName": operation_name, "query": self._minify_query(query), "variables": variables, } @@ -97,7 +94,7 @@ class RealtorScraper(Scraper): } } - response_json = self._graphql_post(MORPHEUS_SUGGESTIONS_QUERY, variables, "GetMorpheusSuggestions") + response_json = self._graphql_post(SEARCH_SUGGESTIONS_QUERY, variables, "Search_suggestions") if ( response_json is None diff --git a/homeharvest/core/scrapers/realtor/queries.py b/homeharvest/core/scrapers/realtor/queries.py index 31496e8..4dcee0f 100644 --- a/homeharvest/core/scrapers/realtor/queries.py +++ b/homeharvest/core/scrapers/realtor/queries.py @@ -712,4 +712,96 @@ fragment ListingPhotosFragment on SearchHome { } """ -MORPHEUS_SUGGESTIONS_QUERY = """query GetMorpheusSuggestions($searchInput: SearchSuggestionsInput!) { search_suggestions(search_input: $searchInput) { __typename geo_results { __typename type text geo { __typename _id _score mpr_id area_type city state_code postal_code country lat lon county counties { __typename name fips state_code } slug_id geo_id score name city_slug_id centroid { __typename lat lon } county_needed_for_uniq street line school school_id school_district school_district_id has_catchment university university_id neighborhood park } } no_matches has_results filter_criteria { __typename property_type { __typename type } price { __typename min max pattern } bed { __typename min max pattern } bath { __typename min max pattern } feature_tags { __typename tags } listing_status { __typename new_construction existing_homes foreclosures recently_sold fifty_five_plus open_house hide_new_construction hide_existing_homes hide_foreclosures hide_recently_sold hide_fifty_five_plus hide_open_house virtual_tour three_d_tour contingent hide_contingent pending hide_pending } keyword { __typename keywords } garage { __typename min max pattern } age { __typename min max pattern } stories { __typename min max pattern } lot_size { __typename min max pattern } square_feet { __typename min max pattern } home_size { __typename min max pattern } basement finished_basement pool waterfront fireplace detached_garage expand { __typename radius } hoa { __typename type fee } } message_data { __typename property_type pool waterfront fireplace basement finished_basement detached_garage listing_status { __typename new_construction existing_homes foreclosures recently_sold fifty_five_plus open_house hide_new_construction hide_existing_homes hide_foreclosures hide_recently_sold hide_fifty_five_plus hide_open_house } keywords price { __typename min max pattern } bed { __typename min max pattern } bath { __typename min max pattern } garage { __typename min max pattern } stories { __typename min max pattern } age { __typename min max pattern } lot_size { __typename min max pattern } square_feet { __typename min max pattern } } original_string morpheus_context } }""" +SEARCH_SUGGESTIONS_QUERY = """query Search_suggestions($searchInput: SearchSuggestionsInput!) { + search_suggestions(search_input: $searchInput) { + raw_input_parser_result + typeahead_results { + display_string + display_geo + geo { + _id + _score + area_type + city + state_code + state + postal_code + country + lat + lon + county + counties { + name + fips + state_code + } + slug_id + geo_id + score + name + city_slug_id + centroid { + lat + lon + } + county_needed_for_uniq + street + line + school + school_id + school_district + has_catchment + university + university_id + neighborhood + park + } + url + } + geo_results { + type + text + geo { + _id + _score + area_type + city + state_code + state + postal_code + country + lat + lon + county + counties { + name + fips + state_code + } + slug_id + geo_id + score + name + city_slug_id + centroid { + lat + lon + } + county_needed_for_uniq + street + line + school + school_id + school_district + has_catchment + university + university_id + neighborhood + park + } + } + no_matches + has_results + original_string + } +}""" diff --git a/pyproject.toml b/pyproject.toml index 518afcb..d918d17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.8.14" +version = "0.8.15" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest"