commit
ba7ad069c9
|
@ -50,6 +50,7 @@ properties = scrape_property(
|
||||||
property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
|
property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
|
||||||
# pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
|
# pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
|
||||||
# mls_only=True, # only fetch MLS listings
|
# mls_only=True, # only fetch MLS listings
|
||||||
|
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
|
||||||
)
|
)
|
||||||
print(f"Number of properties: {len(properties)}")
|
print(f"Number of properties: {len(properties)}")
|
||||||
|
|
||||||
|
@ -61,7 +62,7 @@ print(properties.head())
|
||||||
### CLI
|
### CLI
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] location
|
usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] [-c] location
|
||||||
|
|
||||||
Home Harvest Property Scraper
|
Home Harvest Property Scraper
|
||||||
|
|
||||||
|
@ -79,8 +80,11 @@ options:
|
||||||
Proxy to use for scraping
|
Proxy to use for scraping
|
||||||
-d DAYS, --days DAYS Sold/listed in last _ days filter.
|
-d DAYS, --days DAYS Sold/listed in last _ days filter.
|
||||||
-r RADIUS, --radius RADIUS
|
-r RADIUS, --radius RADIUS
|
||||||
Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses.
|
Get comparable properties within _ (e.g., 0.0) miles. Only applicable for individual addresses.
|
||||||
-m, --mls_only If set, fetches only MLS listings.
|
-m, --mls_only If set, fetches only MLS listings.
|
||||||
|
-c, --pending_or_contingent
|
||||||
|
If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.
|
||||||
|
|
||||||
```
|
```
|
||||||
```bash
|
```bash
|
||||||
> homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest
|
> homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest
|
||||||
|
|
|
@ -7,9 +7,11 @@ filename = f"HomeHarvest_{current_timestamp}.csv"
|
||||||
|
|
||||||
properties = scrape_property(
|
properties = scrape_property(
|
||||||
location="San Diego, CA",
|
location="San Diego, CA",
|
||||||
listing_type="sold", # for_sale, for_rent
|
listing_type="sold", # or (for_sale, for_rent)
|
||||||
property_younger_than=30, # sold/listed in last 30 days
|
property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
|
||||||
mls_only=True, # only fetch MLS listings
|
# pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
|
||||||
|
# mls_only=True, # only fetch MLS listings
|
||||||
|
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
|
||||||
)
|
)
|
||||||
print(f"Number of properties: {len(properties)}")
|
print(f"Number of properties: {len(properties)}")
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,13 @@ def main():
|
||||||
help="If set, fetches only MLS listings.",
|
help="If set, fetches only MLS listings.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-c",
|
||||||
|
"--pending_or_contingent",
|
||||||
|
action="store_true",
|
||||||
|
help="If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
result = scrape_property(
|
result = scrape_property(
|
||||||
|
@ -69,6 +76,7 @@ def main():
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
mls_only=args.mls_only,
|
mls_only=args.mls_only,
|
||||||
property_younger_than=args.days,
|
property_younger_than=args.days,
|
||||||
|
pending_or_contingent=args.pending_or_contingent,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not args.filename:
|
if not args.filename:
|
||||||
|
|
|
@ -122,8 +122,10 @@ class RealtorScraper(Scraper):
|
||||||
list_date=property_info["basic"]["list_date"].split("T")[0]
|
list_date=property_info["basic"]["list_date"].split("T")[0]
|
||||||
if property_info["basic"].get("list_date")
|
if property_info["basic"].get("list_date")
|
||||||
else None,
|
else None,
|
||||||
prc_sqft=property_info["basic"].get("price") / property_info["basic"].get("sqft")
|
prc_sqft=property_info["basic"].get("price")
|
||||||
if property_info["basic"].get("price") and property_info["basic"].get("sqft")
|
/ property_info["basic"].get("sqft")
|
||||||
|
if property_info["basic"].get("price")
|
||||||
|
and property_info["basic"].get("sqft")
|
||||||
else None,
|
else None,
|
||||||
last_sold_date=property_info["basic"]["sold_date"].split("T")[0]
|
last_sold_date=property_info["basic"]["sold_date"].split("T")[0]
|
||||||
if property_info["basic"].get("sold_date")
|
if property_info["basic"].get("sold_date")
|
||||||
|
@ -146,7 +148,7 @@ class RealtorScraper(Scraper):
|
||||||
year_built=property_info["details"].get("year_built"),
|
year_built=property_info["details"].get("year_built"),
|
||||||
garage=property_info["details"].get("garage"),
|
garage=property_info["details"].get("garage"),
|
||||||
stories=property_info["details"].get("stories"),
|
stories=property_info["details"].get("stories"),
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
return [listing]
|
return [listing]
|
||||||
|
@ -175,7 +177,10 @@ class RealtorScraper(Scraper):
|
||||||
if property_info["listings"] is None:
|
if property_info["listings"] is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
primary_listing = next((listing for listing in property_info["listings"] if listing["primary"]), None)
|
primary_listing = next(
|
||||||
|
(listing for listing in property_info["listings"] if listing["primary"]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
if primary_listing:
|
if primary_listing:
|
||||||
return primary_listing["listing_id"]
|
return primary_listing["listing_id"]
|
||||||
else:
|
else:
|
||||||
|
@ -328,7 +333,11 @@ class RealtorScraper(Scraper):
|
||||||
else "sort: [{ field: list_date, direction: desc }]"
|
else "sort: [{ field: list_date, direction: desc }]"
|
||||||
)
|
)
|
||||||
|
|
||||||
pending_or_contingent_param = "or_filters: { contingent: true, pending: true }" if self.pending_or_contingent else ""
|
pending_or_contingent_param = (
|
||||||
|
"or_filters: { contingent: true, pending: true }"
|
||||||
|
if self.pending_or_contingent
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
if search_type == "comps": #: comps search, came from an address
|
if search_type == "comps": #: comps search, came from an address
|
||||||
query = """query Property_search(
|
query = """query Property_search(
|
||||||
|
@ -394,7 +403,9 @@ class RealtorScraper(Scraper):
|
||||||
}
|
}
|
||||||
limit: 1
|
limit: 1
|
||||||
offset: $offset
|
offset: $offset
|
||||||
) %s""" % results_query)
|
) %s"""
|
||||||
|
% results_query
|
||||||
|
)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"query": query,
|
"query": query,
|
||||||
|
@ -477,13 +488,21 @@ class RealtorScraper(Scraper):
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
search_type = "comps" if self.radius and location_type == "address" else "address" if location_type == "address" and not self.radius else "area"
|
search_type = (
|
||||||
|
"comps"
|
||||||
|
if self.radius and location_type == "address"
|
||||||
|
else "address"
|
||||||
|
if location_type == "address" and not self.radius
|
||||||
|
else "area"
|
||||||
|
)
|
||||||
if location_type == "address":
|
if location_type == "address":
|
||||||
if not self.radius: #: single address search, non comps
|
if not self.radius: #: single address search, non comps
|
||||||
property_id = location_info["mpr_id"]
|
property_id = location_info["mpr_id"]
|
||||||
search_variables |= {"property_id": property_id}
|
search_variables |= {"property_id": property_id}
|
||||||
|
|
||||||
gql_results = self.general_search(search_variables, search_type=search_type)
|
gql_results = self.general_search(
|
||||||
|
search_variables, search_type=search_type
|
||||||
|
)
|
||||||
if gql_results["total"] == 0:
|
if gql_results["total"] == 0:
|
||||||
listing_id = self.get_latest_listing_id(property_id)
|
listing_id = self.get_latest_listing_id(property_id)
|
||||||
if listing_id is None:
|
if listing_id is None:
|
||||||
|
@ -561,8 +580,17 @@ class RealtorScraper(Scraper):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_description(result: dict) -> Description:
|
def _parse_description(result: dict) -> Description:
|
||||||
description_data = result.get("description", {})
|
description_data = result.get("description", {})
|
||||||
|
|
||||||
|
if description_data is None or not isinstance(description_data, dict):
|
||||||
|
print("Warning: description_data is invalid!")
|
||||||
|
description_data = {}
|
||||||
|
|
||||||
|
style = description_data.get("type", "")
|
||||||
|
if style is not None:
|
||||||
|
style = style.upper()
|
||||||
|
|
||||||
return Description(
|
return Description(
|
||||||
style=description_data.get("type", "").upper(),
|
style=style,
|
||||||
beds=description_data.get("beds"),
|
beds=description_data.get("beds"),
|
||||||
baths_full=description_data.get("baths_full"),
|
baths_full=description_data.get("baths_full"),
|
||||||
baths_half=description_data.get("baths_half"),
|
baths_half=description_data.get("baths_half"),
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "homeharvest"
|
name = "homeharvest"
|
||||||
version = "0.3.0"
|
version = "0.3.1"
|
||||||
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
||||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||||
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
||||||
|
|
|
@ -16,7 +16,12 @@ def test_realtor_pending_or_contingent():
|
||||||
pending_or_contingent=False,
|
pending_or_contingent=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert all([result is not None for result in [pending_or_contingent_result, regular_result]])
|
assert all(
|
||||||
|
[
|
||||||
|
result is not None
|
||||||
|
for result in [pending_or_contingent_result, regular_result]
|
||||||
|
]
|
||||||
|
)
|
||||||
assert len(pending_or_contingent_result) != len(regular_result)
|
assert len(pending_or_contingent_result) != len(regular_result)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue