pull/40/head v0.3.5
Cullen Watson 2023-10-09 11:30:17 -05:00
commit 5036e74b60
6 changed files with 54 additions and 21 deletions

View File

@ -147,6 +147,7 @@ Property
│ └── lot_sqft │ └── lot_sqft
├── Property Listing Details: ├── Property Listing Details:
│ ├── days_on_mls
│ ├── list_price │ ├── list_price
│ ├── list_date │ ├── list_date
│ ├── sold_price │ ├── sold_price

View File

@ -59,6 +59,7 @@ class Property:
last_sold_date: str | None = None last_sold_date: str | None = None
prc_sqft: int | None = None prc_sqft: int | None = None
hoa_fee: int | None = None hoa_fee: int | None = None
days_on_mls: int | None = None
description: Description | None = None description: Description | None = None
latitude: float | None = None latitude: float | None = None

View File

@ -4,6 +4,7 @@ homeharvest.realtor.__init__
This module implements the scraper for realtor.com This module implements the scraper for realtor.com
""" """
from datetime import datetime
from typing import Dict, Union, Optional from typing import Dict, Union, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
@ -109,6 +110,24 @@ class RealtorScraper(Scraper):
and property_info["address"].get("location") and property_info["address"].get("location")
and property_info["address"]["location"].get("coordinate") and property_info["address"]["location"].get("coordinate")
) )
list_date_str = property_info["basic"]["list_date"].split("T")[0] if property_info["basic"].get(
"list_date") else None
last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get(
"sold_date") else None
list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
today = datetime.now()
days_on_mls = None
status = property_info["basic"]["status"].lower()
if list_date:
if status == "sold" and last_sold_date:
days_on_mls = (last_sold_date - list_date).days
elif status in ('for_sale', 'for_rent'):
days_on_mls = (today - list_date).days
if days_on_mls and days_on_mls < 0:
days_on_mls = None
listing = Property( listing = Property(
mls=mls, mls=mls,
@ -118,17 +137,13 @@ class RealtorScraper(Scraper):
property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}", property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
status=property_info["basic"]["status"].upper(), status=property_info["basic"]["status"].upper(),
list_price=property_info["basic"]["price"], list_price=property_info["basic"]["price"],
list_date=property_info["basic"]["list_date"].split("T")[0] list_date=list_date,
if property_info["basic"].get("list_date")
else None,
prc_sqft=property_info["basic"].get("price") prc_sqft=property_info["basic"].get("price")
/ property_info["basic"].get("sqft") / property_info["basic"].get("sqft")
if property_info["basic"].get("price") if property_info["basic"].get("price")
and property_info["basic"].get("sqft") and property_info["basic"].get("sqft")
else None, else None,
last_sold_date=property_info["basic"]["sold_date"].split("T")[0] last_sold_date=last_sold_date,
if property_info["basic"].get("sold_date")
else None,
latitude=property_info["address"]["location"]["coordinate"].get("lat") latitude=property_info["address"]["location"]["coordinate"].get("lat")
if able_to_get_lat_long if able_to_get_lat_long
else None, else None,
@ -148,6 +163,7 @@ class RealtorScraper(Scraper):
garage=property_info["details"].get("garage"), garage=property_info["details"].get("garage"),
stories=property_info["details"].get("stories"), stories=property_info["details"].get("stories"),
), ),
days_on_mls=days_on_mls
) )
return [listing] return [listing]
@ -478,8 +494,8 @@ class RealtorScraper(Scraper):
if able_to_get_lat_long if able_to_get_lat_long
else None, else None,
address=self._parse_address(result, search_type="general_search"), address=self._parse_address(result, search_type="general_search"),
#: neighborhoods=self._parse_neighborhoods(result),
description=self._parse_description(result), description=self._parse_description(result),
days_on_mls=self.calculate_days_on_mls(result)
) )
properties.append(realty_property) properties.append(realty_property)
@ -590,7 +606,6 @@ class RealtorScraper(Scraper):
description_data = result.get("description", {}) description_data = result.get("description", {})
if description_data is None or not isinstance(description_data, dict): if description_data is None or not isinstance(description_data, dict):
print("Warning: description_data is invalid!")
description_data = {} description_data = {}
style = description_data.get("type", "") style = description_data.get("type", "")
@ -609,3 +624,22 @@ class RealtorScraper(Scraper):
garage=description_data.get("garage"), garage=description_data.get("garage"),
stories=description_data.get("stories"), stories=description_data.get("stories"),
) )
@staticmethod
def calculate_days_on_mls(result: dict) -> Optional[int]:
list_date_str = result.get("list_date")
list_date = datetime.strptime(list_date_str.split("T")[0], "%Y-%m-%d") if list_date_str else None
last_sold_date_str = result.get("last_sold_date")
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
today = datetime.now()
if list_date:
if result["status"] == 'sold':
if last_sold_date:
days = (last_sold_date - list_date).days
if days >= 0:
return days
elif result["status"] in ('for_sale', 'for_rent'):
days = (today - list_date).days
if days >= 0:
return days

View File

@ -18,6 +18,7 @@ ordered_properties = [
"half_baths", "half_baths",
"sqft", "sqft",
"year_built", "year_built",
"days_on_mls",
"list_price", "list_price",
"list_date", "list_date",
"sold_price", "sold_price",

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "homeharvest" name = "homeharvest"
version = "0.3.4" version = "0.3.5"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin." description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"] authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest" homepage = "https://github.com/ZacharyHampton/HomeHarvest"

View File

@ -7,14 +7,10 @@ from homeharvest.exceptions import (
def test_realtor_pending_or_contingent(): def test_realtor_pending_or_contingent():
pending_or_contingent_result = scrape_property( pending_or_contingent_result = scrape_property(
location="Surprise, AZ", location="Surprise, AZ", listing_type="pending"
listing_type="pending"
) )
regular_result = scrape_property( regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale")
location="Surprise, AZ",
listing_type="for_sale"
)
assert all( assert all(
[ [