- extra_property_details parameter

- updated docs
- classified exception
pull/82/head v0.3.21
Zachary Hampton 2024-05-02 09:04:49 -07:00
parent 46985dcee4
commit c3e24a4ce0
6 changed files with 46 additions and 16 deletions

View File

@ -43,7 +43,6 @@ properties = scrape_property(
# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",
# foreclosure=True
# mls_only=True, # only fetch MLS listings
)
print(f"Number of properties: {len(properties)}")
@ -92,6 +91,8 @@ Optional
├── foreclosure (True/False): If set, fetches only foreclosures
└── proxy (string): In format 'http://user:pass@host:port'
└── extra_property_data (bool): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.)
```
### Property Schema
@ -139,17 +140,13 @@ Property
├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone
├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone
│ ├── agent_email
│ └── agent_phone
```
### Exceptions
The following exceptions may be raised when using HomeHarvest:
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD.
- `AuthenticationError` - Realtor.com token request failed.

View File

@ -43,6 +43,7 @@ def scrape_property(
date_from=date_from,
date_to=date_to,
foreclosure=foreclosure,
extra_property_data=extra_property_data,
)
site = RealtorScraper(scraper_input)

View File

@ -3,6 +3,7 @@ import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import uuid
from ...exceptions import AuthenticationError
from .models import Property, ListingType, SiteName
@ -11,12 +12,13 @@ class ScraperInput:
location: str
listing_type: ListingType
radius: float | None = None
mls_only: bool | None = None
mls_only: bool | None = False
proxy: str | None = None
last_x_days: int | None = None
date_from: str | None = None
date_to: str | None = None
foreclosure: bool | None = None
foreclosure: bool | None = False
extra_property_data: bool | None = True
class Scraper:
@ -57,6 +59,7 @@ class Scraper:
self.date_from = scraper_input.date_from
self.date_to = scraper_input.date_to
self.foreclosure = scraper_input.foreclosure
self.extra_property_data = scraper_input.extra_property_data
def search(self) -> list[Property]: ...
@ -65,7 +68,8 @@ class Scraper:
def handle_location(self): ...
def get_access_token(self):
@staticmethod
def get_access_token():
url = "https://graph.realtor.com/auth/token"
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
@ -80,8 +84,11 @@ class Scraper:
response = requests.post(url, headers=headers, data=payload)
data = response.json()
try:
access_token = data["access_token"]
except Exception:
raise Exception("Could not get access token, use a proxy/vpn or wait")
if not (access_token := data.get("access_token")):
raise AuthenticationError(
"Failed to get access token, use a proxy/vpn or wait a moment and try again.",
response=response
)
return access_token

View File

@ -651,6 +651,9 @@ class RealtorScraper(Scraper):
return homes
def get_prop_details(self, property_id: str) -> dict:
if not self.extra_property_data:
return {}
query = """query GetHome($property_id: ID!) {
home(property_id: $property_id) {
__typename

View File

@ -4,3 +4,11 @@ class InvalidListingType(Exception):
class InvalidDate(Exception):
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""
class AuthenticationError(Exception):
"""Raised when there is an issue with the authentication process."""
def __init__(self, *args, response):
super().__init__(*args)
self.response = response

View File

@ -142,3 +142,17 @@ def test_realtor_foreclosed():
def test_realtor_agent():
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
assert scraped["agent"].nunique() > 1
def test_realtor_without_extra_details():
results = [
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
extra_property_data=False,
),
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
),
]
assert results[0] != results[1]