From fefacdd264658d25cd533535ba151dda3e24d03b Mon Sep 17 00:00:00 2001 From: zacharyhampton Date: Sun, 14 Dec 2025 17:32:06 -0800 Subject: [PATCH] Version bump to 0.8.8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- homeharvest/core/scrapers/__init__.py | 2 + homeharvest/core/scrapers/realtor/__init__.py | 81 ++++--- homeharvest/core/scrapers/realtor/queries.py | 212 +++++++++++++++++- pyproject.toml | 2 +- 4 files changed, 261 insertions(+), 36 deletions(-) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 5cdb9bc..5967864 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -91,6 +91,8 @@ class Scraper: 'X-APOLLO-OPERATION-TYPE': 'query', 'rdc-client-name': 'RDC_NATIVE_MOBILE-iPhone-com.move.Realtor', 'apollographql-client-name': 'com.move.Realtor-apollo-ios', + 'newrelic': '', + 'transparent': '', 'User-Agent': 'Realtor.com/26.9.25.0774600 CFNetwork/3860.200.71 Darwin/25.1.0', } ) diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 6d223d3..086e66a 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -26,7 +26,7 @@ from ..models import ( ListingType, ReturnType ) -from .queries import GENERAL_RESULTS_QUERY, SEARCH_HOMES_DATA, HOMES_DATA, HOME_FRAGMENT +from .queries import GENERAL_RESULTS_QUERY, SEARCH_HOMES_DATA, HOMES_DATA, HOME_FRAGMENT, SEARCH_RESULTS_FRAGMENT from .processors import ( process_property, process_extra_property_details, @@ -72,22 +72,26 @@ class RealtorScraper(Scraper): stop=stop_after_attempt(3), ) def handle_location(self): - query = """query SearchSuggestions($searchInput: SearchSuggestionsInput!) { + query = """ + fragment SuggestionFragment on SearchSuggestionGeoResult { + type + text + geo { + _id + area_type + city + state_code + postal_code + county + centroid { lat lon } + slug_id + geo_id + } + } + query SearchSuggestions($searchInput: SearchSuggestionsInput!) { search_suggestions(search_input: $searchInput) { geo_results { - type - text - geo { - _id - area_type - city - state_code - postal_code - county - centroid { lat lon } - slug_id - geo_id - } + ...SuggestionFragment } } }""" @@ -137,11 +141,15 @@ class RealtorScraper(Scraper): return result def get_latest_listing_id(self, property_id: str) -> str | None: - query = """query GetPropertyListingId($property_id: ID!) { + query = """ + fragment ListingFragment on Listing { + listing_id + primary + } + query GetPropertyListingId($property_id: ID!) { property(id: $property_id) { listings { - listing_id - primary + ...ListingFragment } } } @@ -166,10 +174,13 @@ class RealtorScraper(Scraper): def handle_home(self, property_id: str) -> list[Property]: """Fetch single home with proper error handling.""" query = ( - """query GetHomeDetails($property_id: ID!) { - home(property_id: $property_id) %s + """%s + query GetHomeDetails($property_id: ID!) { + home(property_id: $property_id) { + ...HomeDetailsFragment + } }""" - % HOMES_DATA + % HOME_FRAGMENT ) variables = {"property_id": property_id} @@ -394,12 +405,13 @@ class RealtorScraper(Scraper): is_foreclosure = "foreclosure: false" if search_type == "comps": #: comps search, came from an address - query = """query GetHomeSearch( + query = """%s + query GetHomeSearch( $coordinates: [Float]! $radius: String! $offset: Int!, ) { - home_search( + homeSearch: home_search( query: { %s nearby: { @@ -417,6 +429,7 @@ class RealtorScraper(Scraper): offset: $offset ) %s }""" % ( + SEARCH_RESULTS_FRAGMENT, is_foreclosure, status_param, date_param, @@ -427,11 +440,12 @@ class RealtorScraper(Scraper): GENERAL_RESULTS_QUERY, ) elif search_type == "area": #: general search, came from a general location - query = """query GetHomeSearch( + query = """%s + query GetHomeSearch( $search_location: SearchLocation, $offset: Int, ) { - home_search( + homeSearch: home_search( query: { %s search_location: $search_location @@ -447,6 +461,7 @@ class RealtorScraper(Scraper): offset: $offset ) %s }""" % ( + SEARCH_RESULTS_FRAGMENT, is_foreclosure, status_param, date_param, @@ -459,11 +474,12 @@ class RealtorScraper(Scraper): ) else: #: general search, came from an address query = ( - """query GetHomeSearch( + """%s + query GetHomeSearch( $property_id: [ID]! $offset: Int!, ) { - home_search( + homeSearch: home_search( query: { property_id: $property_id } @@ -471,11 +487,11 @@ class RealtorScraper(Scraper): offset: $offset ) %s }""" - % GENERAL_RESULTS_QUERY + % (SEARCH_RESULTS_FRAGMENT, GENERAL_RESULTS_QUERY) ) response_json = self._graphql_post(query, variables, "GetHomeSearch") - search_key = "home_search" if "home_search" in query else "property_search" + search_key = "homeSearch" properties: list[Union[Property, dict]] = [] @@ -1109,18 +1125,17 @@ class RealtorScraper(Scraper): property_ids = list(set(property_ids)) - # Construct the bulk query fragments = "\n".join( - f'home_{property_id}: home(property_id: {property_id}) {{ ...HomeData }}' + f'home_{property_id}: home(property_id: {property_id}) {{ ...HomeDetailsFragment }}' for property_id in property_ids ) query = f"""{HOME_FRAGMENT} -query GetBulkPropertyDetails {{ +query GetHomeDetails {{ {fragments} }}""" - data = self._graphql_post(query, {}, "GetBulkPropertyDetails") + data = self._graphql_post(query, {}, "GetHomeDetails") if "data" not in data: # If we got a 400 error with "Required parameter is missing", raise to trigger retry diff --git a/homeharvest/core/scrapers/realtor/queries.py b/homeharvest/core/scrapers/realtor/queries.py index 2a421d9..0fb4e0e 100644 --- a/homeharvest/core/scrapers/realtor/queries.py +++ b/homeharvest/core/scrapers/realtor/queries.py @@ -1,3 +1,200 @@ +SEARCH_RESULTS_FRAGMENT = """ +fragment SearchFragment on SearchHome { + __typename + pending_date + listing_id + property_id + href + permalink + list_date + status + mls_status + last_sold_price + last_sold_date + last_status_change_date + last_update_date + list_price + list_price_max + list_price_min + price_per_sqft + tags + open_houses { + start_date + end_date + description + time_zone + dst + href + methods + } + details { + category + text + parent_category + } + pet_policy { + cats + dogs + dogs_small + dogs_large + __typename + } + units { + availability { + date + __typename + } + description { + baths_consolidated + baths + beds + sqft + __typename + } + photos(https: true) { + title + href + tags { + label + } + } + list_price + __typename + } + flags { + is_contingent + is_pending + is_new_construction + } + description { + type + sqft + beds + baths_full + baths_half + lot_sqft + year_built + garage + type + name + stories + text + } + source { + id + listing_id + } + hoa { + fee + } + location { + address { + street_direction + street_number + street_name + street_suffix + line + unit + city + state_code + postal_code + coordinate { + lon + lat + } + } + county { + name + fips_code + } + neighborhoods { + name + } + } + tax_record { + cl_id + public_record_id + last_update_date + apn + tax_parcel_id + } + primary_photo(https: true) { + href + } + photos(https: true) { + title + href + tags { + label + } + } + advertisers { + email + broker { + name + fulfillment_id + } + type + name + fulfillment_id + builder { + name + fulfillment_id + } + phones { + ext + primary + type + number + } + office { + name + email + fulfillment_id + href + phones { + number + type + primary + ext + } + mls_set + } + corporation { + specialties + name + bio + href + fulfillment_id + } + mls_set + nrds_id + state_license + rental_corporation { + fulfillment_id + } + rental_management { + name + href + fulfillment_id + } + } + current_estimates { + __typename + source { + __typename + type + name + } + estimate + estimateHigh: estimate_high + estimateLow: estimate_low + date + isBestHomeValue: isbest_homevalue + } +} +""" + _SEARCH_HOMES_DATA_BASE = """{ pending_date listing_id @@ -181,7 +378,7 @@ _SEARCH_HOMES_DATA_BASE = """{ HOME_FRAGMENT = """ -fragment HomeData on Home { +fragment HomeDetailsFragment on Home { property_id nearbySchools: nearby_schools(radius: 5.0, limit_per_level: 3) { __typename schools { district { __typename id name } } @@ -300,8 +497,19 @@ current_estimates { } }""" % _SEARCH_HOMES_DATA_BASE -GENERAL_RESULTS_QUERY = """{ +# Query body using inline fields (kept for backward compatibility) +GENERAL_RESULTS_QUERY_BODY = """{ count total results %s }""" % SEARCH_HOMES_DATA + +GENERAL_RESULTS_QUERY = """{ + __typename + count + total + results { + __typename + ...SearchFragment + } + }""" diff --git a/pyproject.toml b/pyproject.toml index baf9042..4200536 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.8.7" +version = "0.8.8" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest"