from __future__ import annotations from typing import Union import requests import uuid from ...exceptions import AuthenticationError from .models import Property, ListingType, SiteName, SearchPropertyType, ReturnType import json from pydantic import BaseModel DEFAULT_HEADERS = { 'Content-Type': 'application/json', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.9', 'Cache-Control': 'no-cache', 'Origin': 'https://www.realtor.com', 'Pragma': 'no-cache', 'Referer': 'https://www.realtor.com/', 'rdc-client-name': 'RDC_WEB_SRP_FS_PAGE', 'rdc-client-version': '3.0.2515', 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-site', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36', 'x-is-bot': 'false', } class ScraperInput(BaseModel): location: str listing_type: ListingType | list[ListingType] | None property_type: list[SearchPropertyType] | None = None radius: float | None = None mls_only: bool | None = False proxy: str | None = None last_x_days: int | None = None date_from: str | None = None date_to: str | None = None date_from_precision: str | None = None # "day" or "hour" date_to_precision: str | None = None # "day" or "hour" foreclosure: bool | None = False extra_property_data: bool | None = True exclude_pending: bool | None = False limit: int = 10000 offset: int = 0 return_type: ReturnType = ReturnType.pandas # New date/time filtering parameters past_hours: int | None = None # New last_update_date filtering parameters updated_since: str | None = None updated_in_past_hours: int | None = None # New property filtering parameters beds_min: int | None = None beds_max: int | None = None baths_min: float | None = None baths_max: float | None = None sqft_min: int | None = None sqft_max: int | None = None price_min: int | None = None price_max: int | None = None lot_sqft_min: int | None = None lot_sqft_max: int | None = None year_built_min: int | None = None year_built_max: int | None = None # New sorting parameters sort_by: str | None = None sort_direction: str = "desc" # Pagination control parallel: bool = True class Scraper: def __init__( self, scraper_input: ScraperInput, ): self.location = scraper_input.location self.listing_type = scraper_input.listing_type self.property_type = scraper_input.property_type self.proxy = scraper_input.proxy self.proxies = {"http": self.proxy, "https": self.proxy} if self.proxy else None self.listing_type = scraper_input.listing_type self.radius = scraper_input.radius self.last_x_days = scraper_input.last_x_days self.mls_only = scraper_input.mls_only self.date_from = scraper_input.date_from self.date_to = scraper_input.date_to self.date_from_precision = scraper_input.date_from_precision self.date_to_precision = scraper_input.date_to_precision self.foreclosure = scraper_input.foreclosure self.extra_property_data = False # TODO: temporarily disabled self.exclude_pending = scraper_input.exclude_pending self.limit = scraper_input.limit self.offset = scraper_input.offset self.return_type = scraper_input.return_type # New date/time filtering self.past_hours = scraper_input.past_hours # New last_update_date filtering self.updated_since = scraper_input.updated_since self.updated_in_past_hours = scraper_input.updated_in_past_hours # New property filtering self.beds_min = scraper_input.beds_min self.beds_max = scraper_input.beds_max self.baths_min = scraper_input.baths_min self.baths_max = scraper_input.baths_max self.sqft_min = scraper_input.sqft_min self.sqft_max = scraper_input.sqft_max self.price_min = scraper_input.price_min self.price_max = scraper_input.price_max self.lot_sqft_min = scraper_input.lot_sqft_min self.lot_sqft_max = scraper_input.lot_sqft_max self.year_built_min = scraper_input.year_built_min self.year_built_max = scraper_input.year_built_max # New sorting self.sort_by = scraper_input.sort_by self.sort_direction = scraper_input.sort_direction # Pagination control self.parallel = scraper_input.parallel def search(self) -> list[Union[Property | dict]]: ... @staticmethod def _parse_home(home) -> Property: ... def handle_location(self): ... @staticmethod def get_access_token(): device_id = str(uuid.uuid4()).upper() response = requests.post( "https://graph.realtor.com/auth/token", headers={ "Host": "graph.realtor.com", "Accept": "*/*", "Content-Type": "Application/json", "X-Client-ID": "rdc_mobile_native,iphone", "X-Visitor-ID": device_id, "X-Client-Version": "24.21.23.679885", "Accept-Language": "en-US,en;q=0.9", "User-Agent": "Realtor.com/24.21.23.679885 CFNetwork/1494.0.7 Darwin/23.4.0", }, data=json.dumps( { "grant_type": "device_mobile", "device_id": device_id, "client_app_id": "rdc_mobile_native,24.21.23.679885,iphone", } ), ) data = response.json() if not (access_token := data.get("access_token")): raise AuthenticationError( "Failed to get access token, use a proxy/vpn or wait a moment and try again.", response=response ) return access_token