- extra_property_details parameter
- updated docs - classified exceptionpull/82/head v0.3.21
parent
46985dcee4
commit
c3e24a4ce0
15
README.md
15
README.md
|
@ -43,7 +43,6 @@ properties = scrape_property(
|
||||||
# date_from="2023-05-01", # alternative to past_days
|
# date_from="2023-05-01", # alternative to past_days
|
||||||
# date_to="2023-05-28",
|
# date_to="2023-05-28",
|
||||||
# foreclosure=True
|
# foreclosure=True
|
||||||
|
|
||||||
# mls_only=True, # only fetch MLS listings
|
# mls_only=True, # only fetch MLS listings
|
||||||
)
|
)
|
||||||
print(f"Number of properties: {len(properties)}")
|
print(f"Number of properties: {len(properties)}")
|
||||||
|
@ -92,6 +91,8 @@ Optional
|
||||||
├── foreclosure (True/False): If set, fetches only foreclosures
|
├── foreclosure (True/False): If set, fetches only foreclosures
|
||||||
│
|
│
|
||||||
└── proxy (string): In format 'http://user:pass@host:port'
|
└── proxy (string): In format 'http://user:pass@host:port'
|
||||||
|
│
|
||||||
|
└── extra_property_data (bool): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Property Schema
|
### Property Schema
|
||||||
|
@ -139,17 +140,13 @@ Property
|
||||||
|
|
||||||
├── Agent Info:
|
├── Agent Info:
|
||||||
│ ├── agent
|
│ ├── agent
|
||||||
│ ├── broker
|
│ ├── agent_email
|
||||||
│ └── broker_phone
|
│ └── agent_phone
|
||||||
|
|
||||||
├── Agent Info:
|
|
||||||
│ ├── agent
|
|
||||||
│ ├── broker
|
|
||||||
│ └── broker_phone
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Exceptions
|
### Exceptions
|
||||||
The following exceptions may be raised when using HomeHarvest:
|
The following exceptions may be raised when using HomeHarvest:
|
||||||
|
|
||||||
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
|
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
|
||||||
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD
|
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD.
|
||||||
|
- `AuthenticationError` - Realtor.com token request failed.
|
||||||
|
|
|
@ -43,6 +43,7 @@ def scrape_property(
|
||||||
date_from=date_from,
|
date_from=date_from,
|
||||||
date_to=date_to,
|
date_to=date_to,
|
||||||
foreclosure=foreclosure,
|
foreclosure=foreclosure,
|
||||||
|
extra_property_data=extra_property_data,
|
||||||
)
|
)
|
||||||
|
|
||||||
site = RealtorScraper(scraper_input)
|
site = RealtorScraper(scraper_input)
|
||||||
|
|
|
@ -3,6 +3,7 @@ import requests
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
import uuid
|
import uuid
|
||||||
|
from ...exceptions import AuthenticationError
|
||||||
from .models import Property, ListingType, SiteName
|
from .models import Property, ListingType, SiteName
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,12 +12,13 @@ class ScraperInput:
|
||||||
location: str
|
location: str
|
||||||
listing_type: ListingType
|
listing_type: ListingType
|
||||||
radius: float | None = None
|
radius: float | None = None
|
||||||
mls_only: bool | None = None
|
mls_only: bool | None = False
|
||||||
proxy: str | None = None
|
proxy: str | None = None
|
||||||
last_x_days: int | None = None
|
last_x_days: int | None = None
|
||||||
date_from: str | None = None
|
date_from: str | None = None
|
||||||
date_to: str | None = None
|
date_to: str | None = None
|
||||||
foreclosure: bool | None = None
|
foreclosure: bool | None = False
|
||||||
|
extra_property_data: bool | None = True
|
||||||
|
|
||||||
|
|
||||||
class Scraper:
|
class Scraper:
|
||||||
|
@ -57,6 +59,7 @@ class Scraper:
|
||||||
self.date_from = scraper_input.date_from
|
self.date_from = scraper_input.date_from
|
||||||
self.date_to = scraper_input.date_to
|
self.date_to = scraper_input.date_to
|
||||||
self.foreclosure = scraper_input.foreclosure
|
self.foreclosure = scraper_input.foreclosure
|
||||||
|
self.extra_property_data = scraper_input.extra_property_data
|
||||||
|
|
||||||
def search(self) -> list[Property]: ...
|
def search(self) -> list[Property]: ...
|
||||||
|
|
||||||
|
@ -65,7 +68,8 @@ class Scraper:
|
||||||
|
|
||||||
def handle_location(self): ...
|
def handle_location(self): ...
|
||||||
|
|
||||||
def get_access_token(self):
|
@staticmethod
|
||||||
|
def get_access_token():
|
||||||
url = "https://graph.realtor.com/auth/token"
|
url = "https://graph.realtor.com/auth/token"
|
||||||
|
|
||||||
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
|
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
|
||||||
|
@ -80,8 +84,11 @@ class Scraper:
|
||||||
response = requests.post(url, headers=headers, data=payload)
|
response = requests.post(url, headers=headers, data=payload)
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
try:
|
|
||||||
access_token = data["access_token"]
|
if not (access_token := data.get("access_token")):
|
||||||
except Exception:
|
raise AuthenticationError(
|
||||||
raise Exception("Could not get access token, use a proxy/vpn or wait")
|
"Failed to get access token, use a proxy/vpn or wait a moment and try again.",
|
||||||
|
response=response
|
||||||
|
)
|
||||||
|
|
||||||
return access_token
|
return access_token
|
||||||
|
|
|
@ -651,6 +651,9 @@ class RealtorScraper(Scraper):
|
||||||
return homes
|
return homes
|
||||||
|
|
||||||
def get_prop_details(self, property_id: str) -> dict:
|
def get_prop_details(self, property_id: str) -> dict:
|
||||||
|
if not self.extra_property_data:
|
||||||
|
return {}
|
||||||
|
|
||||||
query = """query GetHome($property_id: ID!) {
|
query = """query GetHome($property_id: ID!) {
|
||||||
home(property_id: $property_id) {
|
home(property_id: $property_id) {
|
||||||
__typename
|
__typename
|
||||||
|
|
|
@ -4,3 +4,11 @@ class InvalidListingType(Exception):
|
||||||
|
|
||||||
class InvalidDate(Exception):
|
class InvalidDate(Exception):
|
||||||
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""
|
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""
|
||||||
|
|
||||||
|
|
||||||
|
class AuthenticationError(Exception):
|
||||||
|
"""Raised when there is an issue with the authentication process."""
|
||||||
|
def __init__(self, *args, response):
|
||||||
|
super().__init__(*args)
|
||||||
|
|
||||||
|
self.response = response
|
||||||
|
|
|
@ -142,3 +142,17 @@ def test_realtor_foreclosed():
|
||||||
def test_realtor_agent():
|
def test_realtor_agent():
|
||||||
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
|
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
|
||||||
assert scraped["agent"].nunique() > 1
|
assert scraped["agent"].nunique() > 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_realtor_without_extra_details():
|
||||||
|
results = [
|
||||||
|
scrape_property(
|
||||||
|
location="15509 N 172nd Dr, Surprise, AZ 85388",
|
||||||
|
extra_property_data=False,
|
||||||
|
),
|
||||||
|
scrape_property(
|
||||||
|
location="15509 N 172nd Dr, Surprise, AZ 85388",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
assert results[0] != results[1]
|
||||||
|
|
Loading…
Reference in New Issue