- fix limit parameter
- fix specific for_rent apartment listing pricespull/102/head v0.3.34
parent
3f44744d61
commit
6d14b8df5a
|
@ -128,6 +128,8 @@ Property
|
||||||
├── Property Listing Details:
|
├── Property Listing Details:
|
||||||
│ ├── days_on_mls
|
│ ├── days_on_mls
|
||||||
│ ├── list_price
|
│ ├── list_price
|
||||||
|
│ ├── list_price_min
|
||||||
|
│ ├── list_price_max
|
||||||
│ ├── list_date
|
│ ├── list_date
|
||||||
│ ├── pending_date
|
│ ├── pending_date
|
||||||
│ ├── sold_price
|
│ ├── sold_price
|
||||||
|
|
|
@ -113,6 +113,9 @@ class Property:
|
||||||
address: Address | None = None
|
address: Address | None = None
|
||||||
|
|
||||||
list_price: int | None = None
|
list_price: int | None = None
|
||||||
|
list_price_min: int | None = None
|
||||||
|
list_price_max: int | None = None
|
||||||
|
|
||||||
list_date: str | None = None
|
list_date: str | None = None
|
||||||
pending_date: str | None = None
|
pending_date: str | None = None
|
||||||
last_sold_date: str | None = None
|
last_sold_date: str | None = None
|
||||||
|
|
|
@ -20,6 +20,7 @@ class RealtorScraper(Scraper):
|
||||||
PROPERTY_GQL = "https://graph.realtor.com/graphql"
|
PROPERTY_GQL = "https://graph.realtor.com/graphql"
|
||||||
ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest"
|
ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest"
|
||||||
NUM_PROPERTY_WORKERS = 20
|
NUM_PROPERTY_WORKERS = 20
|
||||||
|
DEFAULT_PAGE_SIZE = 200
|
||||||
|
|
||||||
def __init__(self, scraper_input):
|
def __init__(self, scraper_input):
|
||||||
super().__init__(scraper_input)
|
super().__init__(scraper_input)
|
||||||
|
@ -76,7 +77,6 @@ class RealtorScraper(Scraper):
|
||||||
baths_half
|
baths_half
|
||||||
lot_sqft
|
lot_sqft
|
||||||
sold_price
|
sold_price
|
||||||
sold_price
|
|
||||||
type
|
type
|
||||||
price
|
price
|
||||||
status
|
status
|
||||||
|
@ -326,6 +326,8 @@ class RealtorScraper(Scraper):
|
||||||
last_sold_price
|
last_sold_price
|
||||||
last_sold_date
|
last_sold_date
|
||||||
list_price
|
list_price
|
||||||
|
list_price_max
|
||||||
|
list_price_min
|
||||||
price_per_sqft
|
price_per_sqft
|
||||||
flags {
|
flags {
|
||||||
is_contingent
|
is_contingent
|
||||||
|
@ -551,6 +553,8 @@ class RealtorScraper(Scraper):
|
||||||
),
|
),
|
||||||
status="PENDING" if is_pending else result["status"].upper(),
|
status="PENDING" if is_pending else result["status"].upper(),
|
||||||
list_price=result["list_price"],
|
list_price=result["list_price"],
|
||||||
|
list_price_min=result["list_price_min"],
|
||||||
|
list_price_max=result["list_price_max"],
|
||||||
list_date=result["list_date"].split("T")[0] if result.get("list_date") else None,
|
list_date=result["list_date"].split("T")[0] if result.get("list_date") else None,
|
||||||
prc_sqft=result.get("price_per_sqft"),
|
prc_sqft=result.get("price_per_sqft"),
|
||||||
last_sold_date=result.get("last_sold_date"),
|
last_sold_date=result.get("last_sold_date"),
|
||||||
|
@ -571,9 +575,17 @@ class RealtorScraper(Scraper):
|
||||||
)
|
)
|
||||||
return realty_property
|
return realty_property
|
||||||
|
|
||||||
|
properties_list = response_json["data"][search_key]["results"]
|
||||||
|
total_properties = response_json["data"][search_key]["total"]
|
||||||
|
offset = variables.get("offset", 0)
|
||||||
|
|
||||||
|
#: limit the number of properties to be processed
|
||||||
|
#: example, if your offset is 200, and your limit is 250, return 50
|
||||||
|
properties_list = properties_list[:self.limit - offset]
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:
|
with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:
|
||||||
futures = [
|
futures = [
|
||||||
executor.submit(process_property, result) for result in response_json["data"][search_key]["results"]
|
executor.submit(process_property, result) for result in properties_list
|
||||||
]
|
]
|
||||||
|
|
||||||
for future in as_completed(futures):
|
for future in as_completed(futures):
|
||||||
|
@ -582,7 +594,7 @@ class RealtorScraper(Scraper):
|
||||||
properties.append(result)
|
properties.append(result)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total": response_json["data"][search_key]["total"],
|
"total": total_properties,
|
||||||
"properties": properties,
|
"properties": properties,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -654,7 +666,7 @@ class RealtorScraper(Scraper):
|
||||||
variables=search_variables | {"offset": i},
|
variables=search_variables | {"offset": i},
|
||||||
search_type=search_type,
|
search_type=search_type,
|
||||||
)
|
)
|
||||||
for i in range(200, min(total, self.limit), 200)
|
for i in range(self.DEFAULT_PAGE_SIZE, min(total, self.limit), self.DEFAULT_PAGE_SIZE)
|
||||||
]
|
]
|
||||||
|
|
||||||
for future in as_completed(futures):
|
for future in as_completed(futures):
|
||||||
|
|
|
@ -24,6 +24,8 @@ ordered_properties = [
|
||||||
"year_built",
|
"year_built",
|
||||||
"days_on_mls",
|
"days_on_mls",
|
||||||
"list_price",
|
"list_price",
|
||||||
|
"list_price_min",
|
||||||
|
"list_price_max",
|
||||||
"list_date",
|
"list_date",
|
||||||
"sold_price",
|
"sold_price",
|
||||||
"last_sold_date",
|
"last_sold_date",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "homeharvest"
|
name = "homeharvest"
|
||||||
version = "0.3.33"
|
version = "0.3.34"
|
||||||
description = "Real estate scraping library"
|
description = "Real estate scraping library"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||||
|
|
|
@ -105,8 +105,8 @@ def test_realtor():
|
||||||
location="2530 Al Lipscomb Way",
|
location="2530 Al Lipscomb Way",
|
||||||
listing_type="for_sale",
|
listing_type="for_sale",
|
||||||
),
|
),
|
||||||
scrape_property(location="Phoenix, AZ", listing_type="for_rent"), #: does not support "city, state, USA" format
|
scrape_property(location="Phoenix, AZ", listing_type="for_rent", limit=1000), #: does not support "city, state, USA" format
|
||||||
scrape_property(location="Dallas, TX", listing_type="sold"), #: does not support "city, state, USA" format
|
scrape_property(location="Dallas, TX", listing_type="sold", limit=1000), #: does not support "city, state, USA" format
|
||||||
scrape_property(location="85281"),
|
scrape_property(location="85281"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -117,6 +117,7 @@ def test_realtor_city():
|
||||||
results = scrape_property(
|
results = scrape_property(
|
||||||
location="Atlanta, GA",
|
location="Atlanta, GA",
|
||||||
listing_type="for_sale",
|
listing_type="for_sale",
|
||||||
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
||||||
assert results is not None and len(results) > 0
|
assert results is not None and len(results) > 0
|
||||||
|
@ -140,7 +141,7 @@ def test_realtor_foreclosed():
|
||||||
|
|
||||||
|
|
||||||
def test_realtor_agent():
|
def test_realtor_agent():
|
||||||
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
|
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale", limit=1000)
|
||||||
assert scraped["agent"].nunique() > 1
|
assert scraped["agent"].nunique() > 1
|
||||||
|
|
||||||
|
|
||||||
|
@ -182,6 +183,58 @@ def test_style_value_error():
|
||||||
location="Alaska, AK",
|
location="Alaska, AK",
|
||||||
listing_type="sold",
|
listing_type="sold",
|
||||||
extra_property_data=False,
|
extra_property_data=False,
|
||||||
|
limit=1000,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert results is not None and len(results) > 0
|
assert results is not None and len(results) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_primary_image_error():
|
||||||
|
results = scrape_property(
|
||||||
|
location="Spokane, PA",
|
||||||
|
listing_type="for_rent", # or (for_sale, for_rent, pending)
|
||||||
|
past_days=360,
|
||||||
|
radius=3,
|
||||||
|
extra_property_data=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert results is not None and len(results) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_limit():
|
||||||
|
over_limit = 876
|
||||||
|
extra_params = {"limit": over_limit}
|
||||||
|
|
||||||
|
over_results = scrape_property(
|
||||||
|
location="Waddell, AZ",
|
||||||
|
listing_type="for_sale",
|
||||||
|
**extra_params,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert over_results is not None and len(over_results) <= over_limit
|
||||||
|
|
||||||
|
under_limit = 1
|
||||||
|
under_results = scrape_property(
|
||||||
|
location="Waddell, AZ",
|
||||||
|
listing_type="for_sale",
|
||||||
|
limit=under_limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert under_results is not None and len(under_results) == under_limit
|
||||||
|
|
||||||
|
|
||||||
|
def test_apartment_list_price():
|
||||||
|
results = scrape_property(
|
||||||
|
location="Spokane, WA",
|
||||||
|
listing_type="for_rent", # or (for_sale, for_rent, pending)
|
||||||
|
extra_property_data=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert results is not None
|
||||||
|
|
||||||
|
results = results[results["style"] == "APARTMENT"]
|
||||||
|
|
||||||
|
#: get percentage of results with atleast 1 of any column not none, list_price, list_price_min, list_price_max
|
||||||
|
assert len(results[results[["list_price", "list_price_min", "list_price_max"]].notnull().any(axis=1)]) / len(
|
||||||
|
results
|
||||||
|
) > 0.5
|
||||||
|
|
Loading…
Reference in New Issue