- realtor radius
parent
1f1ca8068f
commit
40bbf76db1
|
@ -106,7 +106,7 @@ def _process_result(result: Property) -> pd.DataFrame:
|
||||||
return properties_df
|
return properties_df
|
||||||
|
|
||||||
|
|
||||||
def _scrape_single_site(location: str, site_name: str, listing_type: str, proxy: str = None) -> pd.DataFrame:
|
def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Helper function to scrape a single site.
|
Helper function to scrape a single site.
|
||||||
"""
|
"""
|
||||||
|
@ -117,6 +117,7 @@ def _scrape_single_site(location: str, site_name: str, listing_type: str, proxy:
|
||||||
listing_type=ListingType[listing_type.upper()],
|
listing_type=ListingType[listing_type.upper()],
|
||||||
site_name=SiteName.get_by_value(site_name.lower()),
|
site_name=SiteName.get_by_value(site_name.lower()),
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
|
radius=radius,
|
||||||
)
|
)
|
||||||
|
|
||||||
site = _scrapers[site_name.lower()](scraper_input)
|
site = _scrapers[site_name.lower()](scraper_input)
|
||||||
|
@ -134,12 +135,14 @@ def scrape_property(
|
||||||
location: str,
|
location: str,
|
||||||
site_name: Union[str, list[str]] = "realtor.com",
|
site_name: Union[str, list[str]] = "realtor.com",
|
||||||
listing_type: str = "for_sale",
|
listing_type: str = "for_sale",
|
||||||
|
radius: float = None,
|
||||||
proxy: str = None,
|
proxy: str = None,
|
||||||
keep_duplicates: bool = False
|
keep_duplicates: bool = False
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Scrape property from various sites from a given location and listing type.
|
Scrape property from various sites from a given location and listing type.
|
||||||
|
|
||||||
|
:param radius: Radius in miles to find comparable properties on individual addresses
|
||||||
:param keep_duplicates:
|
:param keep_duplicates:
|
||||||
:param proxy:
|
:param proxy:
|
||||||
:param location: US Location (e.g. 'San Francisco, CA', 'Cook County, IL', '85281', '2530 Al Lipscomb Way')
|
:param location: US Location (e.g. 'San Francisco, CA', 'Cook County, IL', '85281', '2530 Al Lipscomb Way')
|
||||||
|
@ -157,12 +160,12 @@ def scrape_property(
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
if len(site_name) == 1:
|
if len(site_name) == 1:
|
||||||
final_df = _scrape_single_site(location, site_name[0], listing_type, proxy)
|
final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy)
|
||||||
results.append(final_df)
|
results.append(final_df)
|
||||||
else:
|
else:
|
||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
futures = {
|
futures = {
|
||||||
executor.submit(_scrape_single_site, location, s_name, listing_type, proxy): s_name
|
executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy): s_name
|
||||||
for s_name in site_name
|
for s_name in site_name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ class ScraperInput:
|
||||||
location: str
|
location: str
|
||||||
listing_type: ListingType
|
listing_type: ListingType
|
||||||
site_name: SiteName
|
site_name: SiteName
|
||||||
|
radius: float | None = None
|
||||||
proxy: str | None = None
|
proxy: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,6 +30,7 @@ class Scraper:
|
||||||
|
|
||||||
self.listing_type = scraper_input.listing_type
|
self.listing_type = scraper_input.listing_type
|
||||||
self.site_name = scraper_input.site_name
|
self.site_name = scraper_input.site_name
|
||||||
|
self.radius = scraper_input.radius
|
||||||
|
|
||||||
def search(self) -> list[Property]:
|
def search(self) -> list[Property]:
|
||||||
...
|
...
|
||||||
|
|
|
@ -153,76 +153,90 @@ class RealtorScraper(Scraper):
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
def handle_area(self, variables: dict, return_total: bool = False) -> list[Property] | int:
|
def handle_area(self, variables: dict, is_for_comps: bool = False, return_total: bool = False) -> list[Property] | int:
|
||||||
"""
|
"""
|
||||||
Handles a location area & returns a list of properties
|
Handles a location area & returns a list of properties
|
||||||
"""
|
"""
|
||||||
query = (
|
|
||||||
"""query Home_search(
|
results_query = """{
|
||||||
$city: String,
|
count
|
||||||
$county: [String],
|
total
|
||||||
$state_code: String,
|
results {
|
||||||
$postal_code: String
|
property_id
|
||||||
$offset: Int,
|
description {
|
||||||
) {
|
baths
|
||||||
home_search(
|
beds
|
||||||
query: {
|
lot_sqft
|
||||||
city: $city
|
sqft
|
||||||
county: $county
|
text
|
||||||
postal_code: $postal_code
|
sold_price
|
||||||
state_code: $state_code
|
stories
|
||||||
status: %s
|
year_built
|
||||||
}
|
garage
|
||||||
limit: 200
|
unit_number
|
||||||
offset: $offset
|
floor_number
|
||||||
|
}
|
||||||
|
location {
|
||||||
|
address {
|
||||||
|
city
|
||||||
|
country
|
||||||
|
line
|
||||||
|
postal_code
|
||||||
|
state_code
|
||||||
|
state
|
||||||
|
street_direction
|
||||||
|
street_name
|
||||||
|
street_number
|
||||||
|
street_post_direction
|
||||||
|
street_suffix
|
||||||
|
unit
|
||||||
|
coordinate {
|
||||||
|
lon
|
||||||
|
lat
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list_price
|
||||||
|
price_per_sqft
|
||||||
|
source {
|
||||||
|
id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
if not is_for_comps:
|
||||||
|
query = (
|
||||||
|
"""query Home_search(
|
||||||
|
$city: String,
|
||||||
|
$county: [String],
|
||||||
|
$state_code: String,
|
||||||
|
$postal_code: String
|
||||||
|
$offset: Int,
|
||||||
) {
|
) {
|
||||||
count
|
home_search(
|
||||||
total
|
query: {
|
||||||
results {
|
city: $city
|
||||||
property_id
|
county: $county
|
||||||
description {
|
postal_code: $postal_code
|
||||||
baths
|
state_code: $state_code
|
||||||
beds
|
status: %s
|
||||||
lot_sqft
|
|
||||||
sqft
|
|
||||||
text
|
|
||||||
sold_price
|
|
||||||
stories
|
|
||||||
year_built
|
|
||||||
garage
|
|
||||||
unit_number
|
|
||||||
floor_number
|
|
||||||
}
|
}
|
||||||
location {
|
limit: 200
|
||||||
address {
|
offset: $offset
|
||||||
city
|
) %s"""
|
||||||
country
|
% (self.listing_type.value.lower(), results_query))
|
||||||
line
|
else:
|
||||||
postal_code
|
query = (
|
||||||
state_code
|
"""query Property_search(
|
||||||
state
|
$coordinates: [Float]!
|
||||||
street_direction
|
$radius: String!
|
||||||
street_name
|
$offset: Int!,
|
||||||
street_number
|
) {
|
||||||
street_post_direction
|
property_search(
|
||||||
street_suffix
|
query: { nearby: { coordinates: $coordinates, radius: $radius } }
|
||||||
unit
|
limit: 200
|
||||||
coordinate {
|
offset: $offset
|
||||||
lon
|
) %s""" % results_query)
|
||||||
lat
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
list_price
|
|
||||||
price_per_sqft
|
|
||||||
source {
|
|
||||||
id
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}"""
|
|
||||||
% self.listing_type.value.lower()
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"query": query,
|
"query": query,
|
||||||
|
@ -232,9 +246,10 @@ class RealtorScraper(Scraper):
|
||||||
response = self.session.post(self.search_url, json=payload)
|
response = self.session.post(self.search_url, json=payload)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
response_json = response.json()
|
response_json = response.json()
|
||||||
|
search_key = "home_search" if not is_for_comps else "property_search"
|
||||||
|
|
||||||
if return_total:
|
if return_total:
|
||||||
return response_json["data"]["home_search"]["total"]
|
return response_json["data"][search_key]["total"]
|
||||||
|
|
||||||
properties: list[Property] = []
|
properties: list[Property] = []
|
||||||
|
|
||||||
|
@ -242,13 +257,13 @@ class RealtorScraper(Scraper):
|
||||||
response_json is None
|
response_json is None
|
||||||
or "data" not in response_json
|
or "data" not in response_json
|
||||||
or response_json["data"] is None
|
or response_json["data"] is None
|
||||||
or "home_search" not in response_json["data"]
|
or search_key not in response_json["data"]
|
||||||
or response_json["data"]["home_search"] is None
|
or response_json["data"][search_key] is None
|
||||||
or "results" not in response_json["data"]["home_search"]
|
or "results" not in response_json["data"][search_key]
|
||||||
):
|
):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
for result in response_json["data"]["home_search"]["results"]:
|
for result in response_json["data"][search_key]["results"]:
|
||||||
self.counter += 1
|
self.counter += 1
|
||||||
address_one, _ = parse_address_one(result["location"]["address"]["line"])
|
address_one, _ = parse_address_one(result["location"]["address"]["line"])
|
||||||
realty_property = Property(
|
realty_property = Property(
|
||||||
|
@ -297,21 +312,31 @@ class RealtorScraper(Scraper):
|
||||||
def search(self):
|
def search(self):
|
||||||
location_info = self.handle_location()
|
location_info = self.handle_location()
|
||||||
location_type = location_info["area_type"]
|
location_type = location_info["area_type"]
|
||||||
|
is_for_comps = self.radius is not None and location_type == "address"
|
||||||
|
|
||||||
if location_type == "address":
|
if location_type == "address" and not is_for_comps:
|
||||||
property_id = location_info["mpr_id"]
|
property_id = location_info["mpr_id"]
|
||||||
return self.handle_address(property_id)
|
return self.handle_address(property_id)
|
||||||
|
|
||||||
offset = 0
|
offset = 0
|
||||||
search_variables = {
|
|
||||||
"city": location_info.get("city"),
|
|
||||||
"county": location_info.get("county"),
|
|
||||||
"state_code": location_info.get("state_code"),
|
|
||||||
"postal_code": location_info.get("postal_code"),
|
|
||||||
"offset": offset,
|
|
||||||
}
|
|
||||||
|
|
||||||
total = self.handle_area(search_variables, return_total=True)
|
if not is_for_comps:
|
||||||
|
search_variables = {
|
||||||
|
"city": location_info.get("city"),
|
||||||
|
"county": location_info.get("county"),
|
||||||
|
"state_code": location_info.get("state_code"),
|
||||||
|
"postal_code": location_info.get("postal_code"),
|
||||||
|
"offset": offset,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
coordinates = list(location_info["centroid"].values())
|
||||||
|
search_variables = {
|
||||||
|
"coordinates": coordinates,
|
||||||
|
"radius": "{}mi".format(self.radius),
|
||||||
|
"offset": offset,
|
||||||
|
}
|
||||||
|
|
||||||
|
total = self.handle_area(search_variables, return_total=True, is_for_comps=is_for_comps)
|
||||||
|
|
||||||
homes = []
|
homes = []
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
|
@ -320,6 +345,7 @@ class RealtorScraper(Scraper):
|
||||||
self.handle_area,
|
self.handle_area,
|
||||||
variables=search_variables | {"offset": i},
|
variables=search_variables | {"offset": i},
|
||||||
return_total=False,
|
return_total=False,
|
||||||
|
is_for_comps=is_for_comps,
|
||||||
)
|
)
|
||||||
for i in range(0, total, 200)
|
for i in range(0, total, 200)
|
||||||
]
|
]
|
||||||
|
|
|
@ -7,6 +7,16 @@ from homeharvest.exceptions import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_realtor_comps():
|
||||||
|
result = scrape_property(
|
||||||
|
location="2530 Al Lipscomb Way",
|
||||||
|
site_name="realtor.com",
|
||||||
|
radius=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
|
||||||
def test_realtor():
|
def test_realtor():
|
||||||
results = [
|
results = [
|
||||||
scrape_property(
|
scrape_property(
|
||||||
|
|
Loading…
Reference in New Issue