diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index a4de131..ba79253 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -85,9 +85,11 @@ class Scraper: 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.9', 'Cache-Control': 'no-cache', + 'Origin': 'https://www.realtor.com', 'Pragma': 'no-cache', - 'rdc-client-name': 'rdc-home', - 'rdc-client-version': '2.68.0', + 'Referer': 'https://www.realtor.com/', + 'rdc-client-name': 'RDC_WEB_SRP_FS_PAGE', + 'rdc-client-version': '3.0.2515', 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', @@ -95,6 +97,7 @@ class Scraper: 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-site', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36', + 'x-is-bot': 'false', } ) diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 1fc95f8..e73ac49 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -29,7 +29,7 @@ from ..models import ( ListingType, ReturnType ) -from .queries import GENERAL_RESULTS_QUERY, SEARCH_HOMES_DATA, HOMES_DATA, HOME_FRAGMENT, SEARCH_RESULTS_FRAGMENT, LISTING_PHOTOS_FRAGMENT, SEARCH_SUGGESTIONS_QUERY +from .queries import GENERAL_RESULTS_QUERY, HOMES_DATA, SEARCH_SUGGESTIONS_QUERY from .processors import ( process_property, process_extra_property_details, @@ -171,13 +171,10 @@ class RealtorScraper(Scraper): def handle_home(self, property_id: str) -> list[Property]: """Fetch single home with proper error handling.""" query = ( - """%s - query GetHomeDetails($property_id: ID!) { - home(property_id: $property_id) { - ...SearchFragment - } + """query GetHomeDetails($property_id: ID!) { + home(property_id: $property_id) %s }""" - % HOME_FRAGMENT + % HOMES_DATA ) variables = {"property_id": property_id} @@ -424,9 +421,7 @@ class RealtorScraper(Scraper): limit: 200 offset: $offset ) %s - } - %s - %s""" % ( + }""" % ( is_foreclosure, status_param, date_param, @@ -435,13 +430,11 @@ class RealtorScraper(Scraper): pending_or_contingent_param, sort_param, GENERAL_RESULTS_QUERY, - SEARCH_RESULTS_FRAGMENT, - LISTING_PHOTOS_FRAGMENT, ) elif search_type == "area": #: general search, came from a general location query = """query GetHomeSearch( $search_location: SearchLocation, - $offset: Int, + $offset: Int ) { homeSearch: home_search( query: { @@ -458,9 +451,7 @@ class RealtorScraper(Scraper): limit: 200 offset: $offset ) %s - } - %s - %s""" % ( + }""" % ( is_foreclosure, status_param, date_param, @@ -470,8 +461,6 @@ class RealtorScraper(Scraper): bucket_param, sort_param, GENERAL_RESULTS_QUERY, - SEARCH_RESULTS_FRAGMENT, - LISTING_PHOTOS_FRAGMENT, ) else: #: general search, came from an address query = ( @@ -486,10 +475,8 @@ class RealtorScraper(Scraper): limit: 1 offset: $offset ) %s - } - %s - %s""" - % (GENERAL_RESULTS_QUERY, SEARCH_RESULTS_FRAGMENT, LISTING_PHOTOS_FRAGMENT) + }""" + % GENERAL_RESULTS_QUERY ) response_json = self._graphql_post(query, variables, "GetHomeSearch") @@ -1128,12 +1115,10 @@ class RealtorScraper(Scraper): property_ids = list(set(property_ids)) fragments = "\n".join( - f'home_{property_id}: home(property_id: {property_id}) {{ ...SearchFragment }}' + f'home_{property_id}: home(property_id: {property_id}) {HOMES_DATA}' for property_id in property_ids ) - query = f"""{HOME_FRAGMENT} - -query GetHome {{ + query = f"""query GetHome {{ {fragments} }}""" diff --git a/homeharvest/core/scrapers/realtor/queries.py b/homeharvest/core/scrapers/realtor/queries.py index 70bcf15..f2abcf4 100644 --- a/homeharvest/core/scrapers/realtor/queries.py +++ b/homeharvest/core/scrapers/realtor/queries.py @@ -1,5 +1,5 @@ SEARCH_RESULTS_FRAGMENT = """ -fragment SearchFragment on SearchHome { +fragment PropertyResult on SearchHome { __typename pending_date listing_id @@ -371,7 +371,7 @@ _SEARCH_HOMES_DATA_BASE = """{ HOME_FRAGMENT = """ -fragment SearchFragment on Home { +fragment PropertyResult on Home { __typename pending_date listing_id @@ -689,12 +689,8 @@ GENERAL_RESULTS_QUERY = """{ __typename count total - results { - __typename - ...SearchFragment - ...ListingPhotosFragment - } - }""" + results %s + }""" % SEARCH_HOMES_DATA LISTING_PHOTOS_FRAGMENT = """ fragment ListingPhotosFragment on SearchHome { diff --git a/pyproject.toml b/pyproject.toml index 558797f..e387a93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.8.16" +version = "0.8.17" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest"