- optimizations & updated realtor headers

pull/82/head v0.3.28
Zachary Hampton 2024-05-20 12:13:30 -07:00
parent aacd168545
commit ec036bb989
3 changed files with 21 additions and 15 deletions

View File

@ -49,12 +49,10 @@ def scrape_property(
site = RealtorScraper(scraper_input) site = RealtorScraper(scraper_input)
results = site.search() results = site.search()
properties_dfs = [process_result(result) for result in results] properties_dfs = [df for result in results if not (df := process_result(result)).empty]
if not properties_dfs: if not properties_dfs:
return pd.DataFrame() return pd.DataFrame()
properties_dfs = [df for df in properties_dfs if not df.empty]
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning) warnings.simplefilter("ignore", category=FutureWarning)

View File

@ -6,6 +6,7 @@ from urllib3.util.retry import Retry
import uuid import uuid
from ...exceptions import AuthenticationError from ...exceptions import AuthenticationError
from .models import Property, ListingType, SiteName from .models import Property, ListingType, SiteName
import json
@dataclass @dataclass
@ -71,18 +72,25 @@ class Scraper:
@staticmethod @staticmethod
def get_access_token(): def get_access_token():
url = "https://graph.realtor.com/auth/token" device_id = str(uuid.uuid4()).upper()
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}' response = requests.post(
"https://graph.realtor.com/auth/token",
headers={ headers={
"Host": "graph.realtor.com", 'Host': 'graph.realtor.com',
"x-client-version": "24.20.4.149916", 'Accept': '*/*',
"accept": "*/*", 'Content-Type': 'Application/json',
"content-type": "Application/json", 'X-Client-ID': 'rdc_mobile_native,iphone',
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0", 'X-Visitor-ID': device_id,
"accept-language": "en-US,en;q=0.9", 'X-Client-Version': '24.21.23.679885',
} 'Accept-Language': 'en-US,en;q=0.9',
response = requests.post(url, headers=headers, data=payload) 'User-Agent': 'Realtor.com/24.21.23.679885 CFNetwork/1494.0.7 Darwin/23.4.0',
},
data=json.dumps({
"grant_type": "device_mobile",
"device_id": device_id,
"client_app_id": "rdc_mobile_native,24.21.23.679885,iphone"
}))
data = response.json() data = response.json()

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "homeharvest" name = "homeharvest"
version = "0.3.27" version = "0.3.28"
description = "Real estate scraping library" description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest" homepage = "https://github.com/Bunsly/HomeHarvest"