mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-05 03:54:29 -08:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec036bb989 | ||
|
|
aacd168545 | ||
|
|
0d70007000 |
@@ -49,12 +49,10 @@ def scrape_property(
|
|||||||
site = RealtorScraper(scraper_input)
|
site = RealtorScraper(scraper_input)
|
||||||
results = site.search()
|
results = site.search()
|
||||||
|
|
||||||
properties_dfs = [process_result(result) for result in results]
|
properties_dfs = [df for result in results if not (df := process_result(result)).empty]
|
||||||
if not properties_dfs:
|
if not properties_dfs:
|
||||||
return pd.DataFrame()
|
return pd.DataFrame()
|
||||||
|
|
||||||
properties_dfs = [df for df in properties_dfs if not df.empty]
|
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter("ignore", category=FutureWarning)
|
warnings.simplefilter("ignore", category=FutureWarning)
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from urllib3.util.retry import Retry
|
|||||||
import uuid
|
import uuid
|
||||||
from ...exceptions import AuthenticationError
|
from ...exceptions import AuthenticationError
|
||||||
from .models import Property, ListingType, SiteName
|
from .models import Property, ListingType, SiteName
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -71,18 +72,25 @@ class Scraper:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_access_token():
|
def get_access_token():
|
||||||
url = "https://graph.realtor.com/auth/token"
|
device_id = str(uuid.uuid4()).upper()
|
||||||
|
|
||||||
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
|
response = requests.post(
|
||||||
headers = {
|
"https://graph.realtor.com/auth/token",
|
||||||
"Host": "graph.realtor.com",
|
headers={
|
||||||
"x-client-version": "24.20.4.149916",
|
'Host': 'graph.realtor.com',
|
||||||
"accept": "*/*",
|
'Accept': '*/*',
|
||||||
"content-type": "Application/json",
|
'Content-Type': 'Application/json',
|
||||||
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0",
|
'X-Client-ID': 'rdc_mobile_native,iphone',
|
||||||
"accept-language": "en-US,en;q=0.9",
|
'X-Visitor-ID': device_id,
|
||||||
}
|
'X-Client-Version': '24.21.23.679885',
|
||||||
response = requests.post(url, headers=headers, data=payload)
|
'Accept-Language': 'en-US,en;q=0.9',
|
||||||
|
'User-Agent': 'Realtor.com/24.21.23.679885 CFNetwork/1494.0.7 Darwin/23.4.0',
|
||||||
|
},
|
||||||
|
data=json.dumps({
|
||||||
|
"grant_type": "device_mobile",
|
||||||
|
"device_id": device_id,
|
||||||
|
"client_app_id": "rdc_mobile_native,24.21.23.679885,iphone"
|
||||||
|
}))
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
|
|||||||
@@ -166,7 +166,7 @@ class RealtorScraper(Scraper):
|
|||||||
longitude=property_info["address"]["location"]["coordinate"].get("lon") if able_to_get_lat_long else None,
|
longitude=property_info["address"]["location"]["coordinate"].get("lon") if able_to_get_lat_long else None,
|
||||||
address=self._parse_address(property_info, search_type="handle_listing"),
|
address=self._parse_address(property_info, search_type="handle_listing"),
|
||||||
description=Description(
|
description=Description(
|
||||||
alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])),
|
alt_photos=self.process_alt_photos(property_info["media"].get("photos", [])) if property_info.get("media") else None,
|
||||||
style=property_info["basic"].get("type", "").upper(),
|
style=property_info["basic"].get("type", "").upper(),
|
||||||
beds=property_info["basic"].get("beds"),
|
beds=property_info["basic"].get("beds"),
|
||||||
baths_full=property_info["basic"].get("baths_full"),
|
baths_full=property_info["basic"].get("baths_full"),
|
||||||
@@ -805,7 +805,7 @@ class RealtorScraper(Scraper):
|
|||||||
|
|
||||||
return Description(
|
return Description(
|
||||||
primary_photo=primary_photo,
|
primary_photo=primary_photo,
|
||||||
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
|
alt_photos=RealtorScraper.process_alt_photos(result.get("photos", [])),
|
||||||
style=PropertyType.__getitem__(style) if style and style in PropertyType.__members__ else None,
|
style=PropertyType.__getitem__(style) if style and style in PropertyType.__members__ else None,
|
||||||
beds=description_data.get("beds"),
|
beds=description_data.get("beds"),
|
||||||
baths_full=description_data.get("baths_full"),
|
baths_full=description_data.get("baths_full"),
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ def process_result(result: Property) -> pd.DataFrame:
|
|||||||
|
|
||||||
description = result.description
|
description = result.description
|
||||||
prop_data["primary_photo"] = description.primary_photo
|
prop_data["primary_photo"] = description.primary_photo
|
||||||
prop_data["alt_photos"] = ", ".join(description.alt_photos)
|
prop_data["alt_photos"] = ", ".join(description.alt_photos) if description.alt_photos else None
|
||||||
prop_data["style"] = description.style if type(description.style) == str else description.style.value
|
prop_data["style"] = description.style if type(description.style) == str else description.style.value
|
||||||
prop_data["beds"] = description.beds
|
prop_data["beds"] = description.beds
|
||||||
prop_data["full_baths"] = description.baths_full
|
prop_data["full_baths"] = description.baths_full
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "homeharvest"
|
name = "homeharvest"
|
||||||
version = "0.3.25"
|
version = "0.3.28"
|
||||||
description = "Real estate scraping library"
|
description = "Real estate scraping library"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||||
|
|||||||
Reference in New Issue
Block a user