mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-05 03:54:29 -08:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
851ba53d81 | ||
|
|
0fdc309262 | ||
|
|
62b6726d42 | ||
|
|
ccf5786ce2 | ||
|
|
b4f05b254a | ||
|
|
941d1081f7 | ||
|
|
c788b3318d | ||
|
|
68a3438c6e | ||
|
|
a3c5e9060e | ||
|
|
d06595fe56 | ||
|
|
e378feeefe |
@@ -8,8 +8,6 @@
|
||||
- **Data Format**: Structures data to resemble MLS listings.
|
||||
- **Export Flexibility**: Options to save as either CSV or Excel.
|
||||
|
||||
[Video Guide for HomeHarvest](https://youtu.be/J1qgNPgmSLI) - _updated for release v0.3.4_
|
||||
|
||||

|
||||
|
||||
## Installation
|
||||
|
||||
@@ -4,13 +4,13 @@ from .core.scrapers import ScraperInput
|
||||
from .utils import process_result, ordered_properties, validate_input, validate_dates, validate_limit
|
||||
from .core.scrapers.realtor import RealtorScraper
|
||||
from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property
|
||||
|
||||
from typing import Union, Optional, List
|
||||
|
||||
def scrape_property(
|
||||
location: str,
|
||||
listing_type: str = "for_sale",
|
||||
return_type: str = "pandas",
|
||||
property_type: list[str] | None = None,
|
||||
property_type: Optional[List[str]] = None,
|
||||
radius: float = None,
|
||||
mls_only: bool = False,
|
||||
past_days: int = None,
|
||||
@@ -21,7 +21,7 @@ def scrape_property(
|
||||
extra_property_data: bool = True,
|
||||
exclude_pending: bool = False,
|
||||
limit: int = 10000
|
||||
) -> pd.DataFrame | list[dict] | list[Property]:
|
||||
) -> Union[pd.DataFrame, list[dict], list[Property]]:
|
||||
"""
|
||||
Scrape properties from Realtor.com based on a given location and listing type.
|
||||
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
|
||||
|
||||
@@ -25,6 +25,7 @@ class SiteName(Enum):
|
||||
|
||||
class SearchPropertyType(Enum):
|
||||
SINGLE_FAMILY = "single_family"
|
||||
APARTMENT = "apartment"
|
||||
CONDOS = "condos"
|
||||
CONDO_TOWNHOME_ROWHOME_COOP = "condo_townhome_rowhome_coop"
|
||||
CONDO_TOWNHOME = "condo_townhome"
|
||||
|
||||
@@ -121,7 +121,10 @@ class RealtorScraper(Scraper):
|
||||
|
||||
property_info = response_json["data"]["home"]
|
||||
|
||||
return [self.process_property(property_info)]
|
||||
if self.return_type != ReturnType.raw:
|
||||
return [self.process_property(property_info)]
|
||||
else:
|
||||
return [property_info]
|
||||
|
||||
@staticmethod
|
||||
def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None:
|
||||
@@ -257,7 +260,7 @@ class RealtorScraper(Scraper):
|
||||
sort_param = (
|
||||
"sort: [{ field: sold_date, direction: desc }]"
|
||||
if self.listing_type == ListingType.SOLD
|
||||
else "sort: [{ field: list_date, direction: desc }]"
|
||||
else "" #: "sort: [{ field: list_date, direction: desc }]" #: prioritize normal fractal sort from realtor
|
||||
)
|
||||
|
||||
pending_or_contingent_param = (
|
||||
@@ -448,6 +451,7 @@ class RealtorScraper(Scraper):
|
||||
"county": location_info.get("county"),
|
||||
"state_code": location_info.get("state_code"),
|
||||
"postal_code": location_info.get("postal_code"),
|
||||
|
||||
}
|
||||
|
||||
if self.foreclosure:
|
||||
|
||||
@@ -36,6 +36,13 @@ _SEARCH_HOMES_DATA_BASE = """{
|
||||
sqft
|
||||
__typename
|
||||
}
|
||||
photos(https: true) {
|
||||
title
|
||||
href
|
||||
tags {
|
||||
label
|
||||
}
|
||||
}
|
||||
list_price
|
||||
__typename
|
||||
}
|
||||
@@ -90,12 +97,17 @@ _SEARCH_HOMES_DATA_BASE = """{
|
||||
}
|
||||
}
|
||||
tax_record {
|
||||
cl_id
|
||||
public_record_id
|
||||
last_update_date
|
||||
apn
|
||||
tax_parcel_id
|
||||
}
|
||||
primary_photo(https: true) {
|
||||
href
|
||||
}
|
||||
photos(https: true) {
|
||||
title
|
||||
href
|
||||
tags {
|
||||
label
|
||||
@@ -220,19 +232,19 @@ HOMES_DATA = """%s
|
||||
}""" % _SEARCH_HOMES_DATA_BASE
|
||||
|
||||
SEARCH_HOMES_DATA = """%s
|
||||
current_estimates {
|
||||
__typename
|
||||
source {
|
||||
__typename
|
||||
type
|
||||
name
|
||||
}
|
||||
estimate
|
||||
estimateHigh: estimate_high
|
||||
estimateLow: estimate_low
|
||||
date
|
||||
isBestHomeValue: isbest_homevalue
|
||||
}
|
||||
current_estimates {
|
||||
__typename
|
||||
source {
|
||||
__typename
|
||||
type
|
||||
name
|
||||
}
|
||||
estimate
|
||||
estimateHigh: estimate_high
|
||||
estimateLow: estimate_low
|
||||
date
|
||||
isBestHomeValue: isbest_homevalue
|
||||
}
|
||||
}""" % _SEARCH_HOMES_DATA_BASE
|
||||
|
||||
GENERAL_RESULTS_QUERY = """{
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.4.6"
|
||||
version = "0.4.11"
|
||||
description = "Real estate scraping library"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||
|
||||
@@ -292,11 +292,14 @@ def test_phone_number_matching():
|
||||
|
||||
def test_return_type():
|
||||
results = {
|
||||
"pandas": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100),
|
||||
"pydantic": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic"),
|
||||
"raw": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
|
||||
"pandas": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100)],
|
||||
"pydantic": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic")],
|
||||
"raw": [
|
||||
scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
|
||||
scrape_property(location="66642", listing_type="for_rent", limit=100, return_type="raw"),
|
||||
],
|
||||
}
|
||||
|
||||
assert isinstance(results["pandas"], pd.DataFrame)
|
||||
assert isinstance(results["pydantic"][0], Property)
|
||||
assert isinstance(results["raw"][0], dict)
|
||||
assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
|
||||
assert all(isinstance(result[0], Property) for result in results["pydantic"])
|
||||
assert all(isinstance(result[0], dict) for result in results["raw"])
|
||||
|
||||
Reference in New Issue
Block a user