mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 19:44:29 -08:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
05713c76b0 | ||
|
|
9120cc9bfe | ||
|
|
eee4b19515 | ||
|
|
c25961eded | ||
|
|
0884c3d163 | ||
|
|
8f37bfdeb8 | ||
|
|
48c2338276 |
@@ -1,13 +1,16 @@
|
||||
<img src="https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/d1a2bf8b-09f5-4c57-b33a-0ada8a34f12d" width="400">
|
||||
|
||||
**Not technical?** Try out the web scraping tool on our site at [tryhomeharvest.com](https://tryhomeharvest.com).
|
||||
|
||||
|
||||
**HomeHarvest** is a simple, yet comprehensive, real estate scraping library.
|
||||
|
||||
[](https://replit.com/@ZacharyHampton/HomeHarvestDemo)
|
||||
|
||||
\
|
||||
**Not technical?** Try out the web scraping tool on our site at [tryhomeharvest.com](https://tryhomeharvest.com).
|
||||
|
||||
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/zachary-products/15min)** *to work with us.*
|
||||
|
||||
Check out another project we wrote: ***[JobSpy](https://github.com/cullenwatson/JobSpy)** – a Python package for job scraping*
|
||||
|
||||
## Features
|
||||
|
||||
- Scrapes properties from **Zillow**, **Realtor.com** & **Redfin** simultaneously
|
||||
|
||||
@@ -93,7 +93,7 @@ class RedfinScraper(Scraper):
|
||||
year_built=get_value("yearBuilt") if not single_search else home["yearBuilt"],
|
||||
lot_area_value=lot_size,
|
||||
property_type=PropertyType.from_int_code(home.get("propertyType")),
|
||||
price_per_sqft=get_value("pricePerSqFt"),
|
||||
price_per_sqft=get_value("pricePerSqFt") if type(home.get("pricePerSqFt")) != int else home.get("pricePerSqFt"),
|
||||
mls_id=get_value("mlsId"),
|
||||
latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
|
||||
longitude=home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None,
|
||||
@@ -183,7 +183,7 @@ class RedfinScraper(Scraper):
|
||||
),
|
||||
property_url="https://www.redfin.com{}".format(building["url"]),
|
||||
listing_type=self.listing_type,
|
||||
unit_count=building["numUnitsForSale"],
|
||||
unit_count=building.get("numUnitsForSale"),
|
||||
)
|
||||
|
||||
def handle_address(self, home_id: str):
|
||||
@@ -220,7 +220,14 @@ class RedfinScraper(Scraper):
|
||||
url = f"https://www.redfin.com/stingray/api/gis?al=1®ion_id={region_id}®ion_type={region_type}&sold_within_days=30&num_homes=100000"
|
||||
response = self.session.get(url)
|
||||
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||
homes = [self._parse_home(home) for home in response_json["payload"]["homes"]] + [
|
||||
self._parse_building(building) for building in response_json["payload"]["buildings"].values()
|
||||
]
|
||||
return homes
|
||||
|
||||
if "payload" in response_json:
|
||||
homes_list = response_json["payload"].get("homes", [])
|
||||
buildings_list = response_json["payload"].get("buildings", {}).values()
|
||||
|
||||
homes = [self._parse_home(home) for home in homes_list] + [
|
||||
self._parse_building(building) for building in buildings_list
|
||||
]
|
||||
return homes
|
||||
else:
|
||||
return []
|
||||
|
||||
@@ -147,18 +147,18 @@ class ZillowScraper(Scraper):
|
||||
if "hdpData" in result:
|
||||
home_info = result["hdpData"]["homeInfo"]
|
||||
address_data = {
|
||||
"address_one": parse_address_one(home_info["streetAddress"])[0],
|
||||
"address_one": parse_address_one(home_info.get("streetAddress"))[0],
|
||||
"address_two": parse_address_two(home_info["unit"]) if "unit" in home_info else "#",
|
||||
"city": home_info["city"],
|
||||
"state": home_info["state"],
|
||||
"zip_code": home_info["zipcode"],
|
||||
"city": home_info.get("city"),
|
||||
"state": home_info.get("state"),
|
||||
"zip_code": home_info.get("zipcode"),
|
||||
}
|
||||
property_obj = Property(
|
||||
site_name=self.site_name,
|
||||
address=Address(**address_data),
|
||||
property_url=f"https://www.zillow.com{result['detailUrl']}",
|
||||
tax_assessed_value=int(home_info["taxAssessedValue"]) if "taxAssessedValue" in home_info else None,
|
||||
property_type=PropertyType(home_info["homeType"]),
|
||||
property_type=PropertyType(home_info.get("homeType")),
|
||||
listing_type=ListingType(
|
||||
home_info["statusType"] if "statusType" in home_info else self.listing_type
|
||||
),
|
||||
@@ -198,18 +198,17 @@ class ZillowScraper(Scraper):
|
||||
site_name=self.site_name,
|
||||
property_type=PropertyType("BUILDING"),
|
||||
listing_type=ListingType(result["statusType"]),
|
||||
img_src=result["imgSrc"],
|
||||
img_src=result.get("imgSrc"),
|
||||
address=self._extract_address(result["address"]),
|
||||
baths_min=result["minBaths"],
|
||||
baths_min=result.get("minBaths"),
|
||||
area_min=result.get("minArea"),
|
||||
bldg_name=result.get("communityName"),
|
||||
status_text=result["statusText"],
|
||||
beds_min=result["minBeds"],
|
||||
price_min=price_value if "+/mo" in result["price"] else None,
|
||||
price_max=price_value if "+/mo" in result["price"] else None,
|
||||
latitude=result["latLong"]["latitude"],
|
||||
longitude=result["latLong"]["longitude"],
|
||||
unit_count=result["unitCount"],
|
||||
status_text=result.get("statusText"),
|
||||
price_min=price_value if "+/mo" in result.get("price") else None,
|
||||
price_max=price_value if "+/mo" in result.get("price") else None,
|
||||
latitude=result.get("latLong", {}).get("latitude"),
|
||||
longitude=result.get("latLong", {}).get("longitude"),
|
||||
unit_count=result.get("unitCount"),
|
||||
)
|
||||
|
||||
properties_list.append(building_obj)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.2.9"
|
||||
version = "0.2.12"
|
||||
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
||||
|
||||
Reference in New Issue
Block a user