mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-05 03:54:29 -08:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
254f3a68a1 | ||
|
|
05713c76b0 | ||
|
|
9120cc9bfe | ||
|
|
eee4b19515 | ||
|
|
c25961eded | ||
|
|
0884c3d163 | ||
|
|
8f37bfdeb8 | ||
|
|
48c2338276 | ||
|
|
f58a1f4a74 |
@@ -4,7 +4,13 @@
|
|||||||
|
|
||||||
[](https://replit.com/@ZacharyHampton/HomeHarvestDemo)
|
[](https://replit.com/@ZacharyHampton/HomeHarvestDemo)
|
||||||
|
|
||||||
|
\
|
||||||
|
**Not technical?** Try out the web scraping tool on our site at [tryhomeharvest.com](https://tryhomeharvest.com).
|
||||||
|
|
||||||
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/zachary-products/15min)** *to work with us.*
|
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/zachary-products/15min)** *to work with us.*
|
||||||
|
|
||||||
|
Check out another project we wrote: ***[JobSpy](https://github.com/cullenwatson/JobSpy)** – a Python package for job scraping*
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Scrapes properties from **Zillow**, **Realtor.com** & **Redfin** simultaneously
|
- Scrapes properties from **Zillow**, **Realtor.com** & **Redfin** simultaneously
|
||||||
|
|||||||
@@ -90,10 +90,10 @@ class RedfinScraper(Scraper):
|
|||||||
stories=home["stories"] if "stories" in home else None,
|
stories=home["stories"] if "stories" in home else None,
|
||||||
agent_name=get_value("listingAgent"),
|
agent_name=get_value("listingAgent"),
|
||||||
description=home["listingRemarks"] if "listingRemarks" in home else None,
|
description=home["listingRemarks"] if "listingRemarks" in home else None,
|
||||||
year_built=get_value("yearBuilt") if not single_search else home["yearBuilt"],
|
year_built=get_value("yearBuilt") if not single_search else home.get("yearBuilt"),
|
||||||
lot_area_value=lot_size,
|
lot_area_value=lot_size,
|
||||||
property_type=PropertyType.from_int_code(home.get("propertyType")),
|
property_type=PropertyType.from_int_code(home.get("propertyType")),
|
||||||
price_per_sqft=get_value("pricePerSqFt"),
|
price_per_sqft=get_value("pricePerSqFt") if type(home.get("pricePerSqFt")) != int else home.get("pricePerSqFt"),
|
||||||
mls_id=get_value("mlsId"),
|
mls_id=get_value("mlsId"),
|
||||||
latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
|
latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
|
||||||
longitude=home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None,
|
longitude=home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None,
|
||||||
@@ -183,7 +183,7 @@ class RedfinScraper(Scraper):
|
|||||||
),
|
),
|
||||||
property_url="https://www.redfin.com{}".format(building["url"]),
|
property_url="https://www.redfin.com{}".format(building["url"]),
|
||||||
listing_type=self.listing_type,
|
listing_type=self.listing_type,
|
||||||
unit_count=building["numUnitsForSale"],
|
unit_count=building.get("numUnitsForSale"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_address(self, home_id: str):
|
def handle_address(self, home_id: str):
|
||||||
@@ -220,7 +220,14 @@ class RedfinScraper(Scraper):
|
|||||||
url = f"https://www.redfin.com/stingray/api/gis?al=1®ion_id={region_id}®ion_type={region_type}&sold_within_days=30&num_homes=100000"
|
url = f"https://www.redfin.com/stingray/api/gis?al=1®ion_id={region_id}®ion_type={region_type}&sold_within_days=30&num_homes=100000"
|
||||||
response = self.session.get(url)
|
response = self.session.get(url)
|
||||||
response_json = json.loads(response.text.replace("{}&&", ""))
|
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||||
homes = [self._parse_home(home) for home in response_json["payload"]["homes"]] + [
|
|
||||||
self._parse_building(building) for building in response_json["payload"]["buildings"].values()
|
if "payload" in response_json:
|
||||||
]
|
homes_list = response_json["payload"].get("homes", [])
|
||||||
return homes
|
buildings_list = response_json["payload"].get("buildings", {}).values()
|
||||||
|
|
||||||
|
homes = [self._parse_home(home) for home in homes_list] + [
|
||||||
|
self._parse_building(building) for building in buildings_list
|
||||||
|
]
|
||||||
|
return homes
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|||||||
@@ -147,18 +147,18 @@ class ZillowScraper(Scraper):
|
|||||||
if "hdpData" in result:
|
if "hdpData" in result:
|
||||||
home_info = result["hdpData"]["homeInfo"]
|
home_info = result["hdpData"]["homeInfo"]
|
||||||
address_data = {
|
address_data = {
|
||||||
"address_one": parse_address_one(home_info["streetAddress"])[0],
|
"address_one": parse_address_one(home_info.get("streetAddress"))[0],
|
||||||
"address_two": parse_address_two(home_info["unit"]) if "unit" in home_info else "#",
|
"address_two": parse_address_two(home_info["unit"]) if "unit" in home_info else "#",
|
||||||
"city": home_info["city"],
|
"city": home_info.get("city"),
|
||||||
"state": home_info["state"],
|
"state": home_info.get("state"),
|
||||||
"zip_code": home_info["zipcode"],
|
"zip_code": home_info.get("zipcode"),
|
||||||
}
|
}
|
||||||
property_obj = Property(
|
property_obj = Property(
|
||||||
site_name=self.site_name,
|
site_name=self.site_name,
|
||||||
address=Address(**address_data),
|
address=Address(**address_data),
|
||||||
property_url=f"https://www.zillow.com{result['detailUrl']}",
|
property_url=f"https://www.zillow.com{result['detailUrl']}",
|
||||||
tax_assessed_value=int(home_info["taxAssessedValue"]) if "taxAssessedValue" in home_info else None,
|
tax_assessed_value=int(home_info["taxAssessedValue"]) if "taxAssessedValue" in home_info else None,
|
||||||
property_type=PropertyType(home_info["homeType"]),
|
property_type=PropertyType(home_info.get("homeType")),
|
||||||
listing_type=ListingType(
|
listing_type=ListingType(
|
||||||
home_info["statusType"] if "statusType" in home_info else self.listing_type
|
home_info["statusType"] if "statusType" in home_info else self.listing_type
|
||||||
),
|
),
|
||||||
@@ -198,18 +198,17 @@ class ZillowScraper(Scraper):
|
|||||||
site_name=self.site_name,
|
site_name=self.site_name,
|
||||||
property_type=PropertyType("BUILDING"),
|
property_type=PropertyType("BUILDING"),
|
||||||
listing_type=ListingType(result["statusType"]),
|
listing_type=ListingType(result["statusType"]),
|
||||||
img_src=result["imgSrc"],
|
img_src=result.get("imgSrc"),
|
||||||
address=self._extract_address(result["address"]),
|
address=self._extract_address(result["address"]),
|
||||||
baths_min=result["minBaths"],
|
baths_min=result.get("minBaths"),
|
||||||
area_min=result.get("minArea"),
|
area_min=result.get("minArea"),
|
||||||
bldg_name=result.get("communityName"),
|
bldg_name=result.get("communityName"),
|
||||||
status_text=result["statusText"],
|
status_text=result.get("statusText"),
|
||||||
beds_min=result["minBeds"],
|
price_min=price_value if "+/mo" in result.get("price") else None,
|
||||||
price_min=price_value if "+/mo" in result["price"] else None,
|
price_max=price_value if "+/mo" in result.get("price") else None,
|
||||||
price_max=price_value if "+/mo" in result["price"] else None,
|
latitude=result.get("latLong", {}).get("latitude"),
|
||||||
latitude=result["latLong"]["latitude"],
|
longitude=result.get("latLong", {}).get("longitude"),
|
||||||
longitude=result["latLong"]["longitude"],
|
unit_count=result.get("unitCount"),
|
||||||
unit_count=result["unitCount"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
properties_list.append(building_obj)
|
properties_list.append(building_obj)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "homeharvest"
|
name = "homeharvest"
|
||||||
version = "0.2.8"
|
version = "0.2.13"
|
||||||
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
||||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||||
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
||||||
|
|||||||
Reference in New Issue
Block a user