Compare commits

...

3 Commits

Author SHA1 Message Date
Zachary Hampton
ec036bb989 - optimizations & updated realtor headers 2024-05-20 12:13:30 -07:00
Zachary Hampton
aacd168545 - alt photos bug fix 2024-05-18 17:47:55 -07:00
Zachary Hampton
0d70007000 - alt photos bug fix 2024-05-16 23:04:07 -07:00
5 changed files with 24 additions and 18 deletions

View File

@@ -49,12 +49,10 @@ def scrape_property(
site = RealtorScraper(scraper_input)
results = site.search()
properties_dfs = [process_result(result) for result in results]
properties_dfs = [df for result in results if not (df := process_result(result)).empty]
if not properties_dfs:
return pd.DataFrame()
properties_dfs = [df for df in properties_dfs if not df.empty]
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)

View File

@@ -6,6 +6,7 @@ from urllib3.util.retry import Retry
import uuid
from ...exceptions import AuthenticationError
from .models import Property, ListingType, SiteName
import json
@dataclass
@@ -71,18 +72,25 @@ class Scraper:
@staticmethod
def get_access_token():
url = "https://graph.realtor.com/auth/token"
device_id = str(uuid.uuid4()).upper()
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
headers = {
"Host": "graph.realtor.com",
"x-client-version": "24.20.4.149916",
"accept": "*/*",
"content-type": "Application/json",
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0",
"accept-language": "en-US,en;q=0.9",
}
response = requests.post(url, headers=headers, data=payload)
response = requests.post(
"https://graph.realtor.com/auth/token",
headers={
'Host': 'graph.realtor.com',
'Accept': '*/*',
'Content-Type': 'Application/json',
'X-Client-ID': 'rdc_mobile_native,iphone',
'X-Visitor-ID': device_id,
'X-Client-Version': '24.21.23.679885',
'Accept-Language': 'en-US,en;q=0.9',
'User-Agent': 'Realtor.com/24.21.23.679885 CFNetwork/1494.0.7 Darwin/23.4.0',
},
data=json.dumps({
"grant_type": "device_mobile",
"device_id": device_id,
"client_app_id": "rdc_mobile_native,24.21.23.679885,iphone"
}))
data = response.json()

View File

@@ -166,7 +166,7 @@ class RealtorScraper(Scraper):
longitude=property_info["address"]["location"]["coordinate"].get("lon") if able_to_get_lat_long else None,
address=self._parse_address(property_info, search_type="handle_listing"),
description=Description(
alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])),
alt_photos=self.process_alt_photos(property_info["media"].get("photos", [])) if property_info.get("media") else None,
style=property_info["basic"].get("type", "").upper(),
beds=property_info["basic"].get("beds"),
baths_full=property_info["basic"].get("baths_full"),
@@ -805,7 +805,7 @@ class RealtorScraper(Scraper):
return Description(
primary_photo=primary_photo,
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
alt_photos=RealtorScraper.process_alt_photos(result.get("photos", [])),
style=PropertyType.__getitem__(style) if style and style in PropertyType.__members__ else None,
beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"),

View File

@@ -84,7 +84,7 @@ def process_result(result: Property) -> pd.DataFrame:
description = result.description
prop_data["primary_photo"] = description.primary_photo
prop_data["alt_photos"] = ", ".join(description.alt_photos)
prop_data["alt_photos"] = ", ".join(description.alt_photos) if description.alt_photos else None
prop_data["style"] = description.style if type(description.style) == str else description.style.value
prop_data["beds"] = description.beds
prop_data["full_baths"] = description.baths_full

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.3.25"
version = "0.3.28"
description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest"