[fix] keyerror on style

pull/32/head
Cullen Watson 2023-10-04 22:33:21 -05:00
parent 608cceba34
commit 6f5bbf79a4
5 changed files with 68 additions and 21 deletions

View File

@ -50,6 +50,7 @@ properties = scrape_property(
property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent) property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
# pending_or_contingent=True # use on for_sale listings to find pending / contingent listings # pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
# mls_only=True, # only fetch MLS listings # mls_only=True, # only fetch MLS listings
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
) )
print(f"Number of properties: {len(properties)}") print(f"Number of properties: {len(properties)}")
@ -61,7 +62,7 @@ print(properties.head())
### CLI ### CLI
``` ```
usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] location usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] [-c] location
Home Harvest Property Scraper Home Harvest Property Scraper
@ -79,8 +80,11 @@ options:
Proxy to use for scraping Proxy to use for scraping
-d DAYS, --days DAYS Sold/listed in last _ days filter. -d DAYS, --days DAYS Sold/listed in last _ days filter.
-r RADIUS, --radius RADIUS -r RADIUS, --radius RADIUS
Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses. Get comparable properties within _ (e.g., 0.0) miles. Only applicable for individual addresses.
-m, --mls_only If set, fetches only MLS listings. -m, --mls_only If set, fetches only MLS listings.
-c, --pending_or_contingent
If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.
``` ```
```bash ```bash
> homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest > homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest

View File

@ -7,9 +7,11 @@ filename = f"HomeHarvest_{current_timestamp}.csv"
properties = scrape_property( properties = scrape_property(
location="San Diego, CA", location="San Diego, CA",
listing_type="sold", # for_sale, for_rent listing_type="sold", # or (for_sale, for_rent)
property_younger_than=30, # sold/listed in last 30 days property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
mls_only=True, # only fetch MLS listings # pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
# mls_only=True, # only fetch MLS listings
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
) )
print(f"Number of properties: {len(properties)}") print(f"Number of properties: {len(properties)}")

View File

@ -60,6 +60,13 @@ def main():
help="If set, fetches only MLS listings.", help="If set, fetches only MLS listings.",
) )
parser.add_argument(
"-c",
"--pending_or_contingent",
action="store_true",
help="If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.",
)
args = parser.parse_args() args = parser.parse_args()
result = scrape_property( result = scrape_property(
@ -69,6 +76,7 @@ def main():
proxy=args.proxy, proxy=args.proxy,
mls_only=args.mls_only, mls_only=args.mls_only,
property_younger_than=args.days, property_younger_than=args.days,
pending_or_contingent=args.pending_or_contingent,
) )
if not args.filename: if not args.filename:

View File

@ -105,10 +105,10 @@ class RealtorScraper(Scraper):
) )
able_to_get_lat_long = ( able_to_get_lat_long = (
property_info property_info
and property_info.get("address") and property_info.get("address")
and property_info["address"].get("location") and property_info["address"].get("location")
and property_info["address"]["location"].get("coordinate") and property_info["address"]["location"].get("coordinate")
) )
listing = Property( listing = Property(
@ -122,8 +122,10 @@ class RealtorScraper(Scraper):
list_date=property_info["basic"]["list_date"].split("T")[0] list_date=property_info["basic"]["list_date"].split("T")[0]
if property_info["basic"].get("list_date") if property_info["basic"].get("list_date")
else None, else None,
prc_sqft=property_info["basic"].get("price") / property_info["basic"].get("sqft") prc_sqft=property_info["basic"].get("price")
if property_info["basic"].get("price") and property_info["basic"].get("sqft") / property_info["basic"].get("sqft")
if property_info["basic"].get("price")
and property_info["basic"].get("sqft")
else None, else None,
last_sold_date=property_info["basic"]["sold_date"].split("T")[0] last_sold_date=property_info["basic"]["sold_date"].split("T")[0]
if property_info["basic"].get("sold_date") if property_info["basic"].get("sold_date")
@ -146,7 +148,7 @@ class RealtorScraper(Scraper):
year_built=property_info["details"].get("year_built"), year_built=property_info["details"].get("year_built"),
garage=property_info["details"].get("garage"), garage=property_info["details"].get("garage"),
stories=property_info["details"].get("stories"), stories=property_info["details"].get("stories"),
) ),
) )
return [listing] return [listing]
@ -175,7 +177,10 @@ class RealtorScraper(Scraper):
if property_info["listings"] is None: if property_info["listings"] is None:
return None return None
primary_listing = next((listing for listing in property_info["listings"] if listing["primary"]), None) primary_listing = next(
(listing for listing in property_info["listings"] if listing["primary"]),
None,
)
if primary_listing: if primary_listing:
return primary_listing["listing_id"] return primary_listing["listing_id"]
else: else:
@ -328,7 +333,11 @@ class RealtorScraper(Scraper):
else "sort: [{ field: list_date, direction: desc }]" else "sort: [{ field: list_date, direction: desc }]"
) )
pending_or_contingent_param = "or_filters: { contingent: true, pending: true }" if self.pending_or_contingent else "" pending_or_contingent_param = (
"or_filters: { contingent: true, pending: true }"
if self.pending_or_contingent
else ""
)
if search_type == "comps": #: comps search, came from an address if search_type == "comps": #: comps search, came from an address
query = """query Property_search( query = """query Property_search(
@ -384,7 +393,7 @@ class RealtorScraper(Scraper):
) )
else: #: general search, came from an address else: #: general search, came from an address
query = ( query = (
"""query Property_search( """query Property_search(
$property_id: [ID]! $property_id: [ID]!
$offset: Int!, $offset: Int!,
) { ) {
@ -394,7 +403,9 @@ class RealtorScraper(Scraper):
} }
limit: 1 limit: 1
offset: $offset offset: $offset
) %s""" % results_query) ) %s"""
% results_query
)
payload = { payload = {
"query": query, "query": query,
@ -477,13 +488,21 @@ class RealtorScraper(Scraper):
"offset": 0, "offset": 0,
} }
search_type = "comps" if self.radius and location_type == "address" else "address" if location_type == "address" and not self.radius else "area" search_type = (
"comps"
if self.radius and location_type == "address"
else "address"
if location_type == "address" and not self.radius
else "area"
)
if location_type == "address": if location_type == "address":
if not self.radius: #: single address search, non comps if not self.radius: #: single address search, non comps
property_id = location_info["mpr_id"] property_id = location_info["mpr_id"]
search_variables |= {"property_id": property_id} search_variables |= {"property_id": property_id}
gql_results = self.general_search(search_variables, search_type=search_type) gql_results = self.general_search(
search_variables, search_type=search_type
)
if gql_results["total"] == 0: if gql_results["total"] == 0:
listing_id = self.get_latest_listing_id(property_id) listing_id = self.get_latest_listing_id(property_id)
if listing_id is None: if listing_id is None:
@ -561,8 +580,17 @@ class RealtorScraper(Scraper):
@staticmethod @staticmethod
def _parse_description(result: dict) -> Description: def _parse_description(result: dict) -> Description:
description_data = result.get("description", {}) description_data = result.get("description", {})
if description_data is None or not isinstance(description_data, dict):
print("Warning: description_data is invalid!")
description_data = {}
style = description_data.get("type", "")
if style is not None:
style = style.upper()
return Description( return Description(
style=description_data.get("type", "").upper(), style=style,
beds=description_data.get("beds"), beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"), baths_full=description_data.get("baths_full"),
baths_half=description_data.get("baths_half"), baths_half=description_data.get("baths_half"),

View File

@ -16,7 +16,12 @@ def test_realtor_pending_or_contingent():
pending_or_contingent=False, pending_or_contingent=False,
) )
assert all([result is not None for result in [pending_or_contingent_result, regular_result]]) assert all(
[
result is not None
for result in [pending_or_contingent_result, regular_result]
]
)
assert len(pending_or_contingent_result) != len(regular_result) assert len(pending_or_contingent_result) != len(regular_result)