- extra_property_details parameter
- updated docs - classified exceptionpull/82/head v0.3.21
parent
46985dcee4
commit
c3e24a4ce0
15
README.md
15
README.md
|
@ -43,7 +43,6 @@ properties = scrape_property(
|
|||
# date_from="2023-05-01", # alternative to past_days
|
||||
# date_to="2023-05-28",
|
||||
# foreclosure=True
|
||||
|
||||
# mls_only=True, # only fetch MLS listings
|
||||
)
|
||||
print(f"Number of properties: {len(properties)}")
|
||||
|
@ -92,6 +91,8 @@ Optional
|
|||
├── foreclosure (True/False): If set, fetches only foreclosures
|
||||
│
|
||||
└── proxy (string): In format 'http://user:pass@host:port'
|
||||
│
|
||||
└── extra_property_data (bool): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.)
|
||||
```
|
||||
|
||||
### Property Schema
|
||||
|
@ -139,17 +140,13 @@ Property
|
|||
|
||||
├── Agent Info:
|
||||
│ ├── agent
|
||||
│ ├── broker
|
||||
│ └── broker_phone
|
||||
|
||||
├── Agent Info:
|
||||
│ ├── agent
|
||||
│ ├── broker
|
||||
│ └── broker_phone
|
||||
│ ├── agent_email
|
||||
│ └── agent_phone
|
||||
```
|
||||
|
||||
### Exceptions
|
||||
The following exceptions may be raised when using HomeHarvest:
|
||||
|
||||
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
|
||||
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD
|
||||
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD.
|
||||
- `AuthenticationError` - Realtor.com token request failed.
|
||||
|
|
|
@ -43,6 +43,7 @@ def scrape_property(
|
|||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
foreclosure=foreclosure,
|
||||
extra_property_data=extra_property_data,
|
||||
)
|
||||
|
||||
site = RealtorScraper(scraper_input)
|
||||
|
|
|
@ -3,6 +3,7 @@ import requests
|
|||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
import uuid
|
||||
from ...exceptions import AuthenticationError
|
||||
from .models import Property, ListingType, SiteName
|
||||
|
||||
|
||||
|
@ -11,12 +12,13 @@ class ScraperInput:
|
|||
location: str
|
||||
listing_type: ListingType
|
||||
radius: float | None = None
|
||||
mls_only: bool | None = None
|
||||
mls_only: bool | None = False
|
||||
proxy: str | None = None
|
||||
last_x_days: int | None = None
|
||||
date_from: str | None = None
|
||||
date_to: str | None = None
|
||||
foreclosure: bool | None = None
|
||||
foreclosure: bool | None = False
|
||||
extra_property_data: bool | None = True
|
||||
|
||||
|
||||
class Scraper:
|
||||
|
@ -57,6 +59,7 @@ class Scraper:
|
|||
self.date_from = scraper_input.date_from
|
||||
self.date_to = scraper_input.date_to
|
||||
self.foreclosure = scraper_input.foreclosure
|
||||
self.extra_property_data = scraper_input.extra_property_data
|
||||
|
||||
def search(self) -> list[Property]: ...
|
||||
|
||||
|
@ -65,7 +68,8 @@ class Scraper:
|
|||
|
||||
def handle_location(self): ...
|
||||
|
||||
def get_access_token(self):
|
||||
@staticmethod
|
||||
def get_access_token():
|
||||
url = "https://graph.realtor.com/auth/token"
|
||||
|
||||
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
|
||||
|
@ -80,8 +84,11 @@ class Scraper:
|
|||
response = requests.post(url, headers=headers, data=payload)
|
||||
|
||||
data = response.json()
|
||||
try:
|
||||
access_token = data["access_token"]
|
||||
except Exception:
|
||||
raise Exception("Could not get access token, use a proxy/vpn or wait")
|
||||
|
||||
if not (access_token := data.get("access_token")):
|
||||
raise AuthenticationError(
|
||||
"Failed to get access token, use a proxy/vpn or wait a moment and try again.",
|
||||
response=response
|
||||
)
|
||||
|
||||
return access_token
|
||||
|
|
|
@ -651,6 +651,9 @@ class RealtorScraper(Scraper):
|
|||
return homes
|
||||
|
||||
def get_prop_details(self, property_id: str) -> dict:
|
||||
if not self.extra_property_data:
|
||||
return {}
|
||||
|
||||
query = """query GetHome($property_id: ID!) {
|
||||
home(property_id: $property_id) {
|
||||
__typename
|
||||
|
|
|
@ -4,3 +4,11 @@ class InvalidListingType(Exception):
|
|||
|
||||
class InvalidDate(Exception):
|
||||
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""
|
||||
|
||||
|
||||
class AuthenticationError(Exception):
|
||||
"""Raised when there is an issue with the authentication process."""
|
||||
def __init__(self, *args, response):
|
||||
super().__init__(*args)
|
||||
|
||||
self.response = response
|
||||
|
|
|
@ -142,3 +142,17 @@ def test_realtor_foreclosed():
|
|||
def test_realtor_agent():
|
||||
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
|
||||
assert scraped["agent"].nunique() > 1
|
||||
|
||||
|
||||
def test_realtor_without_extra_details():
|
||||
results = [
|
||||
scrape_property(
|
||||
location="15509 N 172nd Dr, Surprise, AZ 85388",
|
||||
extra_property_data=False,
|
||||
),
|
||||
scrape_property(
|
||||
location="15509 N 172nd Dr, Surprise, AZ 85388",
|
||||
),
|
||||
]
|
||||
|
||||
assert results[0] != results[1]
|
||||
|
|
Loading…
Reference in New Issue