mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-05 03:54:29 -08:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec036bb989 | ||
|
|
aacd168545 | ||
|
|
0d70007000 | ||
|
|
018d3fbac4 | ||
|
|
803fd618e9 | ||
|
|
b23b55ca80 |
@@ -21,7 +21,7 @@
|
||||
```bash
|
||||
pip install -U homeharvest
|
||||
```
|
||||
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
|
||||
_Python version >= [3.9](https://www.python.org/downloads/release/python-3100/) required_
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
@@ -49,11 +49,11 @@ def scrape_property(
|
||||
site = RealtorScraper(scraper_input)
|
||||
results = site.search()
|
||||
|
||||
properties_dfs = [process_result(result) for result in results]
|
||||
properties_dfs = [df for result in results if not (df := process_result(result)).empty]
|
||||
if not properties_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", category=FutureWarning)
|
||||
|
||||
return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties].replace({"None": "", None: ""})
|
||||
return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties].replace({"None": pd.NA, None: pd.NA, "": pd.NA})
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
@@ -5,6 +6,7 @@ from urllib3.util.retry import Retry
|
||||
import uuid
|
||||
from ...exceptions import AuthenticationError
|
||||
from .models import Property, ListingType, SiteName
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -70,18 +72,25 @@ class Scraper:
|
||||
|
||||
@staticmethod
|
||||
def get_access_token():
|
||||
url = "https://graph.realtor.com/auth/token"
|
||||
device_id = str(uuid.uuid4()).upper()
|
||||
|
||||
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
|
||||
headers = {
|
||||
"Host": "graph.realtor.com",
|
||||
"x-client-version": "24.20.4.149916",
|
||||
"accept": "*/*",
|
||||
"content-type": "Application/json",
|
||||
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
}
|
||||
response = requests.post(url, headers=headers, data=payload)
|
||||
response = requests.post(
|
||||
"https://graph.realtor.com/auth/token",
|
||||
headers={
|
||||
'Host': 'graph.realtor.com',
|
||||
'Accept': '*/*',
|
||||
'Content-Type': 'Application/json',
|
||||
'X-Client-ID': 'rdc_mobile_native,iphone',
|
||||
'X-Visitor-ID': device_id,
|
||||
'X-Client-Version': '24.21.23.679885',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'User-Agent': 'Realtor.com/24.21.23.679885 CFNetwork/1494.0.7 Darwin/23.4.0',
|
||||
},
|
||||
data=json.dumps({
|
||||
"grant_type": "device_mobile",
|
||||
"device_id": device_id,
|
||||
"client_app_id": "rdc_mobile_native,24.21.23.679885,iphone"
|
||||
}))
|
||||
|
||||
data = response.json()
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
@@ -36,6 +37,7 @@ class PropertyType(Enum):
|
||||
CONDO_TOWNHOME = "CONDO_TOWNHOME"
|
||||
CONDO_TOWNHOME_ROWHOME_COOP = "CONDO_TOWNHOME_ROWHOME_COOP"
|
||||
CONDO = "CONDO"
|
||||
CONDOP = "CONDOP"
|
||||
CONDOS = "CONDOS"
|
||||
COOP = "COOP"
|
||||
DUPLEX_TRIPLEX = "DUPLEX_TRIPLEX"
|
||||
@@ -52,6 +54,7 @@ class PropertyType(Enum):
|
||||
|
||||
@dataclass
|
||||
class Address:
|
||||
full_line: str | None = None
|
||||
street: str | None = None
|
||||
unit: str | None = None
|
||||
city: str | None = None
|
||||
|
||||
@@ -4,7 +4,7 @@ homeharvest.realtor.__init__
|
||||
|
||||
This module implements the scraper for realtor.com
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from typing import Dict, Union, Optional
|
||||
@@ -52,6 +52,7 @@ class RealtorScraper(Scraper):
|
||||
listing_id
|
||||
}
|
||||
address {
|
||||
line
|
||||
street_direction
|
||||
street_number
|
||||
street_name
|
||||
@@ -165,7 +166,7 @@ class RealtorScraper(Scraper):
|
||||
longitude=property_info["address"]["location"]["coordinate"].get("lon") if able_to_get_lat_long else None,
|
||||
address=self._parse_address(property_info, search_type="handle_listing"),
|
||||
description=Description(
|
||||
alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])),
|
||||
alt_photos=self.process_alt_photos(property_info["media"].get("photos", [])) if property_info.get("media") else None,
|
||||
style=property_info["basic"].get("type", "").upper(),
|
||||
beds=property_info["basic"].get("beds"),
|
||||
baths_full=property_info["basic"].get("baths_full"),
|
||||
@@ -236,6 +237,7 @@ class RealtorScraper(Scraper):
|
||||
stories
|
||||
}
|
||||
address {
|
||||
line
|
||||
street_direction
|
||||
street_number
|
||||
street_name
|
||||
@@ -352,6 +354,7 @@ class RealtorScraper(Scraper):
|
||||
street_number
|
||||
street_name
|
||||
street_suffix
|
||||
line
|
||||
unit
|
||||
city
|
||||
state_code
|
||||
@@ -657,6 +660,8 @@ class RealtorScraper(Scraper):
|
||||
if not self.extra_property_data:
|
||||
return {}
|
||||
|
||||
#: TODO: migrate "advertisers" and "estimates" to general query
|
||||
|
||||
query = """query GetHome($property_id: ID!) {
|
||||
home(property_id: $property_id) {
|
||||
__typename
|
||||
@@ -765,6 +770,7 @@ class RealtorScraper(Scraper):
|
||||
address = result["address"]
|
||||
|
||||
return Address(
|
||||
full_line=address.get("line"),
|
||||
street=" ".join(
|
||||
part for part in [
|
||||
address.get("street_number"),
|
||||
@@ -799,8 +805,8 @@ class RealtorScraper(Scraper):
|
||||
|
||||
return Description(
|
||||
primary_photo=primary_photo,
|
||||
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
|
||||
style=PropertyType(style) if style else None,
|
||||
alt_photos=RealtorScraper.process_alt_photos(result.get("photos", [])),
|
||||
style=PropertyType.__getitem__(style) if style and style in PropertyType.__members__ else None,
|
||||
beds=description_data.get("beds"),
|
||||
baths_full=description_data.get("baths_full"),
|
||||
baths_half=description_data.get("baths_half"),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from __future__ import annotations
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from .core.scrapers.models import Property, ListingType, Agent
|
||||
@@ -10,6 +11,7 @@ ordered_properties = [
|
||||
"status",
|
||||
"text",
|
||||
"style",
|
||||
"full_street_line",
|
||||
"street",
|
||||
"unit",
|
||||
"city",
|
||||
@@ -55,6 +57,7 @@ def process_result(result: Property) -> pd.DataFrame:
|
||||
|
||||
if "address" in prop_data:
|
||||
address_data = prop_data["address"]
|
||||
prop_data["full_street_line"] = address_data.full_line
|
||||
prop_data["street"] = address_data.street
|
||||
prop_data["unit"] = address_data.unit
|
||||
prop_data["city"] = address_data.city
|
||||
@@ -81,7 +84,7 @@ def process_result(result: Property) -> pd.DataFrame:
|
||||
|
||||
description = result.description
|
||||
prop_data["primary_photo"] = description.primary_photo
|
||||
prop_data["alt_photos"] = ", ".join(description.alt_photos)
|
||||
prop_data["alt_photos"] = ", ".join(description.alt_photos) if description.alt_photos else None
|
||||
prop_data["style"] = description.style if type(description.style) == str else description.style.value
|
||||
prop_data["beds"] = description.beds
|
||||
prop_data["full_baths"] = description.baths_full
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.3.22"
|
||||
version = "0.3.28"
|
||||
description = "Real estate scraping library"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||
@@ -10,7 +10,7 @@ readme = "README.md"
|
||||
homeharvest = "homeharvest.cli:main"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.10,<3.13"
|
||||
python = ">=3.9,<3.13"
|
||||
requests = "^2.31.0"
|
||||
pandas = "^2.1.1"
|
||||
|
||||
|
||||
@@ -155,4 +155,13 @@ def test_realtor_without_extra_details():
|
||||
),
|
||||
]
|
||||
|
||||
assert results[0] != results[1]
|
||||
assert not results[0].equals(results[1])
|
||||
|
||||
|
||||
def test_pr_zip_code():
|
||||
results = scrape_property(
|
||||
location="00741",
|
||||
listing_type="for_sale",
|
||||
)
|
||||
|
||||
assert results is not None and len(results) > 0
|
||||
|
||||
Reference in New Issue
Block a user