2024-05-14 19:13:04 -07:00
|
|
|
from __future__ import annotations
|
2023-09-15 15:17:37 -07:00
|
|
|
from dataclasses import dataclass
|
|
|
|
import requests
|
2024-04-30 13:29:54 -07:00
|
|
|
from requests.adapters import HTTPAdapter
|
|
|
|
from urllib3.util.retry import Retry
|
2024-04-16 12:55:44 -07:00
|
|
|
import uuid
|
2024-05-02 09:04:49 -07:00
|
|
|
from ...exceptions import AuthenticationError
|
2024-11-03 15:23:07 -08:00
|
|
|
from .models import Property, ListingType, SiteName, SearchPropertyType
|
2024-05-20 12:13:30 -07:00
|
|
|
import json
|
2023-09-15 15:17:37 -07:00
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class ScraperInput:
|
|
|
|
location: str
|
|
|
|
listing_type: ListingType
|
2024-11-03 15:23:07 -08:00
|
|
|
property_type: list[SearchPropertyType] | None = None
|
2023-10-02 13:58:47 -07:00
|
|
|
radius: float | None = None
|
2024-05-02 09:04:49 -07:00
|
|
|
mls_only: bool | None = False
|
2023-09-19 13:43:24 -07:00
|
|
|
proxy: str | None = None
|
2023-10-04 08:11:53 -07:00
|
|
|
last_x_days: int | None = None
|
2023-11-03 16:35:41 -07:00
|
|
|
date_from: str | None = None
|
|
|
|
date_to: str | None = None
|
2024-05-02 09:04:49 -07:00
|
|
|
foreclosure: bool | None = False
|
|
|
|
extra_property_data: bool | None = True
|
2024-05-31 22:17:29 -07:00
|
|
|
exclude_pending: bool | None = False
|
2024-07-15 07:19:57 -07:00
|
|
|
limit: int = 10000
|
2023-09-15 15:17:37 -07:00
|
|
|
|
|
|
|
|
|
|
|
class Scraper:
|
2024-04-30 13:29:54 -07:00
|
|
|
session = None
|
|
|
|
|
2023-10-04 08:11:53 -07:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
scraper_input: ScraperInput,
|
|
|
|
):
|
2023-09-15 15:17:37 -07:00
|
|
|
self.location = scraper_input.location
|
2023-09-18 08:16:59 -07:00
|
|
|
self.listing_type = scraper_input.listing_type
|
2024-11-03 15:23:07 -08:00
|
|
|
self.property_type = scraper_input.property_type
|
2023-09-18 08:16:59 -07:00
|
|
|
|
2024-04-30 13:29:54 -07:00
|
|
|
if not self.session:
|
|
|
|
Scraper.session = requests.Session()
|
|
|
|
retries = Retry(
|
2024-11-03 15:23:07 -08:00
|
|
|
total=3, backoff_factor=4, status_forcelist=[429, 403], allowed_methods=frozenset(["GET", "POST"])
|
2024-04-30 13:29:54 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
adapter = HTTPAdapter(max_retries=retries)
|
|
|
|
Scraper.session.mount("http://", adapter)
|
|
|
|
Scraper.session.mount("https://", adapter)
|
|
|
|
Scraper.session.headers.update(
|
2024-04-16 12:55:44 -07:00
|
|
|
{
|
2024-11-03 15:23:07 -08:00
|
|
|
"accept": "application/json, text/javascript",
|
|
|
|
"accept-language": "en-US,en;q=0.9",
|
|
|
|
"cache-control": "no-cache",
|
|
|
|
"content-type": "application/json",
|
|
|
|
"origin": "https://www.realtor.com",
|
|
|
|
"pragma": "no-cache",
|
|
|
|
"priority": "u=1, i",
|
|
|
|
"rdc-ab-tests": "commute_travel_time_variation:v1",
|
|
|
|
"sec-ch-ua": '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
|
|
|
|
"sec-ch-ua-mobile": "?0",
|
|
|
|
"sec-ch-ua-platform": '"Windows"',
|
|
|
|
"sec-fetch-dest": "empty",
|
|
|
|
"sec-fetch-mode": "cors",
|
|
|
|
"sec-fetch-site": "same-origin",
|
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
|
2024-04-16 12:55:44 -07:00
|
|
|
}
|
|
|
|
)
|
2023-09-28 18:40:16 -07:00
|
|
|
|
2023-09-19 14:05:14 -07:00
|
|
|
if scraper_input.proxy:
|
|
|
|
proxy_url = scraper_input.proxy
|
2023-09-19 19:13:20 -07:00
|
|
|
proxies = {"http": proxy_url, "https": proxy_url}
|
2023-09-19 14:05:14 -07:00
|
|
|
self.session.proxies.update(proxies)
|
2023-09-28 18:40:16 -07:00
|
|
|
|
2023-09-17 16:30:37 -07:00
|
|
|
self.listing_type = scraper_input.listing_type
|
2023-10-02 13:58:47 -07:00
|
|
|
self.radius = scraper_input.radius
|
2023-10-04 08:11:53 -07:00
|
|
|
self.last_x_days = scraper_input.last_x_days
|
|
|
|
self.mls_only = scraper_input.mls_only
|
2023-11-03 16:35:41 -07:00
|
|
|
self.date_from = scraper_input.date_from
|
|
|
|
self.date_to = scraper_input.date_to
|
2024-03-03 09:45:28 -08:00
|
|
|
self.foreclosure = scraper_input.foreclosure
|
2024-05-02 09:04:49 -07:00
|
|
|
self.extra_property_data = scraper_input.extra_property_data
|
2024-05-31 22:17:29 -07:00
|
|
|
self.exclude_pending = scraper_input.exclude_pending
|
2024-07-15 07:19:57 -07:00
|
|
|
self.limit = scraper_input.limit
|
2023-09-15 15:17:37 -07:00
|
|
|
|
2024-04-16 12:55:44 -07:00
|
|
|
def search(self) -> list[Property]: ...
|
2023-09-15 15:42:47 -07:00
|
|
|
|
|
|
|
@staticmethod
|
2024-04-16 12:55:44 -07:00
|
|
|
def _parse_home(home) -> Property: ...
|
2023-09-15 20:58:54 -07:00
|
|
|
|
2024-04-16 12:55:44 -07:00
|
|
|
def handle_location(self): ...
|
|
|
|
|
2024-05-02 09:04:49 -07:00
|
|
|
@staticmethod
|
|
|
|
def get_access_token():
|
2024-05-20 12:13:30 -07:00
|
|
|
device_id = str(uuid.uuid4()).upper()
|
|
|
|
|
|
|
|
response = requests.post(
|
|
|
|
"https://graph.realtor.com/auth/token",
|
|
|
|
headers={
|
2024-11-03 15:23:07 -08:00
|
|
|
"Host": "graph.realtor.com",
|
|
|
|
"Accept": "*/*",
|
|
|
|
"Content-Type": "Application/json",
|
|
|
|
"X-Client-ID": "rdc_mobile_native,iphone",
|
|
|
|
"X-Visitor-ID": device_id,
|
|
|
|
"X-Client-Version": "24.21.23.679885",
|
|
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
|
|
"User-Agent": "Realtor.com/24.21.23.679885 CFNetwork/1494.0.7 Darwin/23.4.0",
|
2024-05-20 12:13:30 -07:00
|
|
|
},
|
2024-11-03 15:23:07 -08:00
|
|
|
data=json.dumps(
|
|
|
|
{
|
|
|
|
"grant_type": "device_mobile",
|
|
|
|
"device_id": device_id,
|
|
|
|
"client_app_id": "rdc_mobile_native,24.21.23.679885,iphone",
|
|
|
|
}
|
|
|
|
),
|
|
|
|
)
|
2024-04-16 12:55:44 -07:00
|
|
|
|
|
|
|
data = response.json()
|
2024-05-02 09:04:49 -07:00
|
|
|
|
|
|
|
if not (access_token := data.get("access_token")):
|
|
|
|
raise AuthenticationError(
|
2024-11-03 15:23:07 -08:00
|
|
|
"Failed to get access token, use a proxy/vpn or wait a moment and try again.", response=response
|
2024-05-02 09:04:49 -07:00
|
|
|
)
|
|
|
|
|
2024-04-30 13:29:54 -07:00
|
|
|
return access_token
|