- fix limit parameter
- fix specific for_rent apartment listing pricespull/102/head v0.3.34
parent
3f44744d61
commit
6d14b8df5a
|
@ -128,6 +128,8 @@ Property
|
|||
├── Property Listing Details:
|
||||
│ ├── days_on_mls
|
||||
│ ├── list_price
|
||||
│ ├── list_price_min
|
||||
│ ├── list_price_max
|
||||
│ ├── list_date
|
||||
│ ├── pending_date
|
||||
│ ├── sold_price
|
||||
|
|
|
@ -113,6 +113,9 @@ class Property:
|
|||
address: Address | None = None
|
||||
|
||||
list_price: int | None = None
|
||||
list_price_min: int | None = None
|
||||
list_price_max: int | None = None
|
||||
|
||||
list_date: str | None = None
|
||||
pending_date: str | None = None
|
||||
last_sold_date: str | None = None
|
||||
|
|
|
@ -20,6 +20,7 @@ class RealtorScraper(Scraper):
|
|||
PROPERTY_GQL = "https://graph.realtor.com/graphql"
|
||||
ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest"
|
||||
NUM_PROPERTY_WORKERS = 20
|
||||
DEFAULT_PAGE_SIZE = 200
|
||||
|
||||
def __init__(self, scraper_input):
|
||||
super().__init__(scraper_input)
|
||||
|
@ -76,7 +77,6 @@ class RealtorScraper(Scraper):
|
|||
baths_half
|
||||
lot_sqft
|
||||
sold_price
|
||||
sold_price
|
||||
type
|
||||
price
|
||||
status
|
||||
|
@ -326,6 +326,8 @@ class RealtorScraper(Scraper):
|
|||
last_sold_price
|
||||
last_sold_date
|
||||
list_price
|
||||
list_price_max
|
||||
list_price_min
|
||||
price_per_sqft
|
||||
flags {
|
||||
is_contingent
|
||||
|
@ -551,6 +553,8 @@ class RealtorScraper(Scraper):
|
|||
),
|
||||
status="PENDING" if is_pending else result["status"].upper(),
|
||||
list_price=result["list_price"],
|
||||
list_price_min=result["list_price_min"],
|
||||
list_price_max=result["list_price_max"],
|
||||
list_date=result["list_date"].split("T")[0] if result.get("list_date") else None,
|
||||
prc_sqft=result.get("price_per_sqft"),
|
||||
last_sold_date=result.get("last_sold_date"),
|
||||
|
@ -571,9 +575,17 @@ class RealtorScraper(Scraper):
|
|||
)
|
||||
return realty_property
|
||||
|
||||
properties_list = response_json["data"][search_key]["results"]
|
||||
total_properties = response_json["data"][search_key]["total"]
|
||||
offset = variables.get("offset", 0)
|
||||
|
||||
#: limit the number of properties to be processed
|
||||
#: example, if your offset is 200, and your limit is 250, return 50
|
||||
properties_list = properties_list[:self.limit - offset]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:
|
||||
futures = [
|
||||
executor.submit(process_property, result) for result in response_json["data"][search_key]["results"]
|
||||
executor.submit(process_property, result) for result in properties_list
|
||||
]
|
||||
|
||||
for future in as_completed(futures):
|
||||
|
@ -582,7 +594,7 @@ class RealtorScraper(Scraper):
|
|||
properties.append(result)
|
||||
|
||||
return {
|
||||
"total": response_json["data"][search_key]["total"],
|
||||
"total": total_properties,
|
||||
"properties": properties,
|
||||
}
|
||||
|
||||
|
@ -654,7 +666,7 @@ class RealtorScraper(Scraper):
|
|||
variables=search_variables | {"offset": i},
|
||||
search_type=search_type,
|
||||
)
|
||||
for i in range(200, min(total, self.limit), 200)
|
||||
for i in range(self.DEFAULT_PAGE_SIZE, min(total, self.limit), self.DEFAULT_PAGE_SIZE)
|
||||
]
|
||||
|
||||
for future in as_completed(futures):
|
||||
|
|
|
@ -24,6 +24,8 @@ ordered_properties = [
|
|||
"year_built",
|
||||
"days_on_mls",
|
||||
"list_price",
|
||||
"list_price_min",
|
||||
"list_price_max",
|
||||
"list_date",
|
||||
"sold_price",
|
||||
"last_sold_date",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.3.33"
|
||||
version = "0.3.34"
|
||||
description = "Real estate scraping library"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||
|
|
|
@ -105,8 +105,8 @@ def test_realtor():
|
|||
location="2530 Al Lipscomb Way",
|
||||
listing_type="for_sale",
|
||||
),
|
||||
scrape_property(location="Phoenix, AZ", listing_type="for_rent"), #: does not support "city, state, USA" format
|
||||
scrape_property(location="Dallas, TX", listing_type="sold"), #: does not support "city, state, USA" format
|
||||
scrape_property(location="Phoenix, AZ", listing_type="for_rent", limit=1000), #: does not support "city, state, USA" format
|
||||
scrape_property(location="Dallas, TX", listing_type="sold", limit=1000), #: does not support "city, state, USA" format
|
||||
scrape_property(location="85281"),
|
||||
]
|
||||
|
||||
|
@ -117,6 +117,7 @@ def test_realtor_city():
|
|||
results = scrape_property(
|
||||
location="Atlanta, GA",
|
||||
listing_type="for_sale",
|
||||
limit=1000
|
||||
)
|
||||
|
||||
assert results is not None and len(results) > 0
|
||||
|
@ -140,7 +141,7 @@ def test_realtor_foreclosed():
|
|||
|
||||
|
||||
def test_realtor_agent():
|
||||
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
|
||||
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale", limit=1000)
|
||||
assert scraped["agent"].nunique() > 1
|
||||
|
||||
|
||||
|
@ -182,6 +183,58 @@ def test_style_value_error():
|
|||
location="Alaska, AK",
|
||||
listing_type="sold",
|
||||
extra_property_data=False,
|
||||
limit=1000,
|
||||
)
|
||||
|
||||
assert results is not None and len(results) > 0
|
||||
|
||||
|
||||
def test_primary_image_error():
|
||||
results = scrape_property(
|
||||
location="Spokane, PA",
|
||||
listing_type="for_rent", # or (for_sale, for_rent, pending)
|
||||
past_days=360,
|
||||
radius=3,
|
||||
extra_property_data=False,
|
||||
)
|
||||
|
||||
assert results is not None and len(results) > 0
|
||||
|
||||
|
||||
def test_limit():
|
||||
over_limit = 876
|
||||
extra_params = {"limit": over_limit}
|
||||
|
||||
over_results = scrape_property(
|
||||
location="Waddell, AZ",
|
||||
listing_type="for_sale",
|
||||
**extra_params,
|
||||
)
|
||||
|
||||
assert over_results is not None and len(over_results) <= over_limit
|
||||
|
||||
under_limit = 1
|
||||
under_results = scrape_property(
|
||||
location="Waddell, AZ",
|
||||
listing_type="for_sale",
|
||||
limit=under_limit,
|
||||
)
|
||||
|
||||
assert under_results is not None and len(under_results) == under_limit
|
||||
|
||||
|
||||
def test_apartment_list_price():
|
||||
results = scrape_property(
|
||||
location="Spokane, WA",
|
||||
listing_type="for_rent", # or (for_sale, for_rent, pending)
|
||||
extra_property_data=False,
|
||||
)
|
||||
|
||||
assert results is not None
|
||||
|
||||
results = results[results["style"] == "APARTMENT"]
|
||||
|
||||
#: get percentage of results with atleast 1 of any column not none, list_price, list_price_min, list_price_max
|
||||
assert len(results[results[["list_price", "list_price_min", "list_price_max"]].notnull().any(axis=1)]) / len(
|
||||
results
|
||||
) > 0.5
|
||||
|
|
Loading…
Reference in New Issue