- finished realtor
parent
905cfcae2c
commit
ba9fe806a7
|
@ -69,9 +69,9 @@ def process_result(result: Union[Building, Property]) -> pd.DataFrame:
|
|||
prop_data = result.__dict__
|
||||
|
||||
address_data = prop_data["address"]
|
||||
prop_data["site_name"] = prop_data["site_name"].value
|
||||
prop_data["site_name"] = prop_data["site_name"]
|
||||
prop_data["listing_type"] = prop_data["listing_type"].value
|
||||
prop_data["property_type"] = prop_data["property_type"].value.lower()
|
||||
prop_data["property_type"] = prop_data["property_type"].value.lower() if prop_data["property_type"] else None
|
||||
prop_data["address_one"] = address_data.address_one
|
||||
prop_data["city"] = address_data.city
|
||||
prop_data["state"] = address_data.state
|
||||
|
@ -90,13 +90,13 @@ def scrape_property(
|
|||
location: str,
|
||||
site_name: str,
|
||||
listing_type: str = "for_sale", #: for_sale, for_rent, sold
|
||||
) -> Union[list[Building], list[Property]]:
|
||||
) -> pd.DataFrame:
|
||||
validate_input(site_name, listing_type)
|
||||
|
||||
scraper_input = ScraperInput(
|
||||
location=location,
|
||||
listing_type=ListingType[listing_type.upper()],
|
||||
site_name=SiteName[site_name.upper()],
|
||||
site_name=site_name.lower(),
|
||||
)
|
||||
|
||||
site = _scrapers[site_name.lower()](scraper_input)
|
||||
|
|
|
@ -7,13 +7,15 @@ from .models import Property, ListingType, SiteName
|
|||
class ScraperInput:
|
||||
location: str
|
||||
listing_type: ListingType
|
||||
site_name: SiteName
|
||||
site_name: str
|
||||
proxy_url: str | None = None
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, scraper_input: ScraperInput):
|
||||
self.location = scraper_input.location
|
||||
self.listing_type = scraper_input.listing_type
|
||||
|
||||
self.session = requests.Session()
|
||||
self.listing_type = scraper_input.listing_type
|
||||
self.site_name = scraper_input.site_name
|
||||
|
|
|
@ -53,7 +53,7 @@ class Address:
|
|||
|
||||
@dataclass()
|
||||
class Realty:
|
||||
site_name: SiteName
|
||||
site_name: str
|
||||
address: Address
|
||||
url: str
|
||||
listing_type: ListingType | None = None
|
||||
|
@ -68,7 +68,6 @@ class Property(Realty):
|
|||
year_built: int | None = None
|
||||
square_feet: int | None = None
|
||||
price_per_square_foot: int | None = None
|
||||
year_built: int | None = None
|
||||
mls_id: str | None = None
|
||||
|
||||
agent_name: str | None = None
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
import json
|
||||
from ..models import Property, Address
|
||||
from .. import Scraper
|
||||
from typing import Any
|
||||
from typing import Any, Generator
|
||||
from ....exceptions import NoResultsFound
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
|
||||
class RealtorScraper(Scraper):
|
||||
def __init__(self, scraper_input):
|
||||
super().__init__(scraper_input)
|
||||
self.search_url = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
|
||||
|
||||
def handle_location(self):
|
||||
headers = {
|
||||
|
@ -26,7 +29,7 @@ class RealtorScraper(Scraper):
|
|||
|
||||
params = {
|
||||
"input": self.location,
|
||||
"client_id": "for-sale",
|
||||
"client_id": self.listing_type.value.replace('_', '-'),
|
||||
"limit": "1",
|
||||
"area_types": "city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park",
|
||||
}
|
||||
|
@ -38,14 +41,228 @@ class RealtorScraper(Scraper):
|
|||
)
|
||||
response_json = response.json()
|
||||
|
||||
return response_json["autocomplete"][0]
|
||||
result = response_json["autocomplete"]
|
||||
|
||||
if result is None:
|
||||
raise NoResultsFound("No results found for location: " + self.location)
|
||||
|
||||
return result[0]
|
||||
|
||||
def handle_address(self, property_id: str) -> list[Property]:
|
||||
query = """query Property($property_id: ID!) {
|
||||
property(id: $property_id) {
|
||||
property_id
|
||||
details {
|
||||
date_updated
|
||||
garage
|
||||
permalink
|
||||
year_built
|
||||
stories
|
||||
}
|
||||
address {
|
||||
address_validation_code
|
||||
city
|
||||
country
|
||||
county
|
||||
line
|
||||
postal_code
|
||||
state_code
|
||||
street_direction
|
||||
street_name
|
||||
street_number
|
||||
street_suffix
|
||||
street_post_direction
|
||||
unit_value
|
||||
unit
|
||||
unit_descriptor
|
||||
zip
|
||||
}
|
||||
basic {
|
||||
baths
|
||||
beds
|
||||
price
|
||||
sqft
|
||||
lot_sqft
|
||||
type
|
||||
sold_price
|
||||
}
|
||||
public_record {
|
||||
lot_size
|
||||
sqft
|
||||
stories
|
||||
units
|
||||
year_built
|
||||
}
|
||||
}
|
||||
}"""
|
||||
|
||||
variables = {
|
||||
'property_id': property_id
|
||||
}
|
||||
|
||||
payload = {
|
||||
'query': query,
|
||||
'variables': variables,
|
||||
}
|
||||
|
||||
response = self.session.post(self.search_url, json=payload)
|
||||
response_json = response.json()
|
||||
|
||||
property_info = response_json['data']['property']
|
||||
|
||||
return [Property(
|
||||
site_name=self.site_name,
|
||||
address=Address(
|
||||
address_one=property_info['address']['line'],
|
||||
city=property_info['address']['city'],
|
||||
state=property_info['address']['state_code'],
|
||||
zip_code=property_info['address']['postal_code'],
|
||||
),
|
||||
url="https://www.realtor.com/realestateandhomes-detail/" + property_info['details']['permalink'],
|
||||
beds=property_info['basic']['beds'],
|
||||
baths=property_info['basic']['baths'],
|
||||
stories=property_info['details']['stories'],
|
||||
year_built=property_info['details']['year_built'],
|
||||
square_feet=property_info['basic']['sqft'],
|
||||
price_per_square_foot=property_info['basic']['price'] / property_info['basic']['sqft']
|
||||
if property_info['basic']['sqft'] is not None and
|
||||
property_info['basic']['price'] is not None
|
||||
else None,
|
||||
price=property_info['basic']['price'],
|
||||
mls_id=property_id,
|
||||
listing_type=self.listing_type,
|
||||
lot_size=property_info['public_record']['lot_size'] if property_info['public_record'] is not None else None,
|
||||
)]
|
||||
|
||||
def handle_area(self, variables: dict, return_total: bool = False) -> list[Property] | int:
|
||||
query = """query Home_search(
|
||||
$city: String,
|
||||
$county: [String],
|
||||
$state_code: String,
|
||||
$postal_code: String
|
||||
$offset: Int,
|
||||
) {
|
||||
home_search(
|
||||
query: {
|
||||
city: $city
|
||||
county: $county
|
||||
postal_code: $postal_code
|
||||
state_code: $state_code
|
||||
status: %s
|
||||
}
|
||||
limit: 200
|
||||
offset: $offset
|
||||
) {
|
||||
count
|
||||
total
|
||||
results {
|
||||
property_id
|
||||
description {
|
||||
baths
|
||||
beds
|
||||
lot_sqft
|
||||
sqft
|
||||
text
|
||||
sold_price
|
||||
stories
|
||||
year_built
|
||||
garage
|
||||
unit_number
|
||||
floor_number
|
||||
}
|
||||
location {
|
||||
address {
|
||||
city
|
||||
country
|
||||
line
|
||||
postal_code
|
||||
state_code
|
||||
state
|
||||
street_direction
|
||||
street_name
|
||||
street_number
|
||||
street_post_direction
|
||||
street_suffix
|
||||
unit
|
||||
}
|
||||
}
|
||||
list_price
|
||||
price_per_sqft
|
||||
source {
|
||||
id
|
||||
}
|
||||
}
|
||||
}
|
||||
}""" % self.listing_type.value
|
||||
|
||||
payload = {
|
||||
'query': query,
|
||||
'variables': variables,
|
||||
}
|
||||
|
||||
response = self.session.post(self.search_url, json=payload)
|
||||
response_json = response.json()
|
||||
|
||||
if return_total:
|
||||
return response_json['data']['home_search']['total']
|
||||
|
||||
properties: list[Property] = []
|
||||
|
||||
for result in response_json['data']['home_search']['results']:
|
||||
realty_property = Property(
|
||||
address=Address(
|
||||
address_one=result['location']['address']['line'],
|
||||
city=result['location']['address']['city'],
|
||||
state=result['location']['address']['state_code'],
|
||||
zip_code=result['location']['address']['postal_code'],
|
||||
address_two=result['location']['address']['unit'],
|
||||
),
|
||||
site_name=self.site_name,
|
||||
url="https://www.realtor.com/realestateandhomes-detail/" + result['property_id'],
|
||||
beds=result['description']['beds'],
|
||||
baths=result['description']['baths'],
|
||||
stories=result['description']['stories'],
|
||||
year_built=result['description']['year_built'],
|
||||
square_feet=result['description']['sqft'],
|
||||
price_per_square_foot=result['price_per_sqft'],
|
||||
price=result['list_price'],
|
||||
mls_id=result['property_id'],
|
||||
listing_type=self.listing_type,
|
||||
lot_size=result['description']['lot_sqft'],
|
||||
)
|
||||
|
||||
properties.append(realty_property)
|
||||
|
||||
return properties
|
||||
|
||||
def search(self):
|
||||
location_info = self.handle_location()
|
||||
location_type = location_info["area_type"]
|
||||
|
||||
"""
|
||||
property types:
|
||||
apartment + building + commercial + condo_townhome + condo_townhome_rowhome_coop + condos + coop + duplex_triplex + farm + investment + land + mobile + multi_family + rental + single_family + townhomes
|
||||
"""
|
||||
print("a")
|
||||
if location_type == 'address':
|
||||
property_id = location_info['mpr_id']
|
||||
return self.handle_address(property_id)
|
||||
|
||||
offset = 0
|
||||
search_variables = {
|
||||
'city': location_info.get('city'),
|
||||
'county': location_info.get('county'),
|
||||
'state_code': location_info.get('state_code'),
|
||||
'postal_code': location_info.get('postal_code'),
|
||||
'offset': offset,
|
||||
}
|
||||
|
||||
total = self.handle_area(search_variables, return_total=True)
|
||||
|
||||
homes = []
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
self.handle_area, variables=search_variables | {'offset': i}, return_total=False
|
||||
) for i in range(0, total, 200)
|
||||
]
|
||||
|
||||
for future in as_completed(futures):
|
||||
homes.extend(future.result())
|
||||
|
||||
return homes
|
||||
|
|
|
@ -3,6 +3,9 @@ from homeharvest import scrape_property
|
|||
|
||||
def test_realtor():
|
||||
results = [
|
||||
scrape_property(location="2530 Al Lipscomb Way", site_name="realtor.com"),
|
||||
scrape_property(location="Phoenix, AZ", site_name="realtor.com"), #: does not support "city, state, USA" format
|
||||
scrape_property(location="Dallas, TX", site_name="realtor.com"), #: does not support "city, state, USA" format
|
||||
scrape_property(location="85281", site_name="realtor.com"),
|
||||
]
|
||||
|
||||
|
|
Loading…
Reference in New Issue