- extra_property_details parameter

- updated docs
- classified exception
pull/82/head v0.3.21
Zachary Hampton 2024-05-02 09:04:49 -07:00
parent 46985dcee4
commit c3e24a4ce0
6 changed files with 46 additions and 16 deletions

View File

@ -43,7 +43,6 @@ properties = scrape_property(
# date_from="2023-05-01", # alternative to past_days # date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28", # date_to="2023-05-28",
# foreclosure=True # foreclosure=True
# mls_only=True, # only fetch MLS listings # mls_only=True, # only fetch MLS listings
) )
print(f"Number of properties: {len(properties)}") print(f"Number of properties: {len(properties)}")
@ -92,6 +91,8 @@ Optional
├── foreclosure (True/False): If set, fetches only foreclosures ├── foreclosure (True/False): If set, fetches only foreclosures
└── proxy (string): In format 'http://user:pass@host:port' └── proxy (string): In format 'http://user:pass@host:port'
└── extra_property_data (bool): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.)
``` ```
### Property Schema ### Property Schema
@ -139,17 +140,13 @@ Property
├── Agent Info: ├── Agent Info:
│ ├── agent │ ├── agent
│ ├── broker │ ├── agent_email
│ └── broker_phone │ └── agent_phone
├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone
``` ```
### Exceptions ### Exceptions
The following exceptions may be raised when using HomeHarvest: The following exceptions may be raised when using HomeHarvest:
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold` - `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD - `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD.
- `AuthenticationError` - Realtor.com token request failed.

View File

@ -43,6 +43,7 @@ def scrape_property(
date_from=date_from, date_from=date_from,
date_to=date_to, date_to=date_to,
foreclosure=foreclosure, foreclosure=foreclosure,
extra_property_data=extra_property_data,
) )
site = RealtorScraper(scraper_input) site = RealtorScraper(scraper_input)

View File

@ -3,6 +3,7 @@ import requests
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
import uuid import uuid
from ...exceptions import AuthenticationError
from .models import Property, ListingType, SiteName from .models import Property, ListingType, SiteName
@ -11,12 +12,13 @@ class ScraperInput:
location: str location: str
listing_type: ListingType listing_type: ListingType
radius: float | None = None radius: float | None = None
mls_only: bool | None = None mls_only: bool | None = False
proxy: str | None = None proxy: str | None = None
last_x_days: int | None = None last_x_days: int | None = None
date_from: str | None = None date_from: str | None = None
date_to: str | None = None date_to: str | None = None
foreclosure: bool | None = None foreclosure: bool | None = False
extra_property_data: bool | None = True
class Scraper: class Scraper:
@ -57,6 +59,7 @@ class Scraper:
self.date_from = scraper_input.date_from self.date_from = scraper_input.date_from
self.date_to = scraper_input.date_to self.date_to = scraper_input.date_to
self.foreclosure = scraper_input.foreclosure self.foreclosure = scraper_input.foreclosure
self.extra_property_data = scraper_input.extra_property_data
def search(self) -> list[Property]: ... def search(self) -> list[Property]: ...
@ -65,7 +68,8 @@ class Scraper:
def handle_location(self): ... def handle_location(self): ...
def get_access_token(self): @staticmethod
def get_access_token():
url = "https://graph.realtor.com/auth/token" url = "https://graph.realtor.com/auth/token"
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}' payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
@ -80,8 +84,11 @@ class Scraper:
response = requests.post(url, headers=headers, data=payload) response = requests.post(url, headers=headers, data=payload)
data = response.json() data = response.json()
try:
access_token = data["access_token"] if not (access_token := data.get("access_token")):
except Exception: raise AuthenticationError(
raise Exception("Could not get access token, use a proxy/vpn or wait") "Failed to get access token, use a proxy/vpn or wait a moment and try again.",
response=response
)
return access_token return access_token

View File

@ -651,6 +651,9 @@ class RealtorScraper(Scraper):
return homes return homes
def get_prop_details(self, property_id: str) -> dict: def get_prop_details(self, property_id: str) -> dict:
if not self.extra_property_data:
return {}
query = """query GetHome($property_id: ID!) { query = """query GetHome($property_id: ID!) {
home(property_id: $property_id) { home(property_id: $property_id) {
__typename __typename

View File

@ -4,3 +4,11 @@ class InvalidListingType(Exception):
class InvalidDate(Exception): class InvalidDate(Exception):
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23""" """Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""
class AuthenticationError(Exception):
"""Raised when there is an issue with the authentication process."""
def __init__(self, *args, response):
super().__init__(*args)
self.response = response

View File

@ -142,3 +142,17 @@ def test_realtor_foreclosed():
def test_realtor_agent(): def test_realtor_agent():
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale") scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
assert scraped["agent"].nunique() > 1 assert scraped["agent"].nunique() > 1
def test_realtor_without_extra_details():
results = [
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
extra_property_data=False,
),
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
),
]
assert results[0] != results[1]