mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-05 03:54:29 -08:00
Compare commits
8 Commits
a3c5e9060e
...
v0.4.11
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
851ba53d81 | ||
|
|
0fdc309262 | ||
|
|
62b6726d42 | ||
|
|
ccf5786ce2 | ||
|
|
b4f05b254a | ||
|
|
941d1081f7 | ||
|
|
c788b3318d | ||
|
|
68a3438c6e |
@@ -8,8 +8,6 @@
|
|||||||
- **Data Format**: Structures data to resemble MLS listings.
|
- **Data Format**: Structures data to resemble MLS listings.
|
||||||
- **Export Flexibility**: Options to save as either CSV or Excel.
|
- **Export Flexibility**: Options to save as either CSV or Excel.
|
||||||
|
|
||||||
[Video Guide for HomeHarvest](https://youtu.be/J1qgNPgmSLI) - _updated for release v0.3.4_
|
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|||||||
@@ -4,13 +4,13 @@ from .core.scrapers import ScraperInput
|
|||||||
from .utils import process_result, ordered_properties, validate_input, validate_dates, validate_limit
|
from .utils import process_result, ordered_properties, validate_input, validate_dates, validate_limit
|
||||||
from .core.scrapers.realtor import RealtorScraper
|
from .core.scrapers.realtor import RealtorScraper
|
||||||
from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property
|
from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property
|
||||||
|
from typing import Union, Optional, List
|
||||||
|
|
||||||
def scrape_property(
|
def scrape_property(
|
||||||
location: str,
|
location: str,
|
||||||
listing_type: str = "for_sale",
|
listing_type: str = "for_sale",
|
||||||
return_type: str = "pandas",
|
return_type: str = "pandas",
|
||||||
property_type: list[str] | None = None,
|
property_type: Optional[List[str]] = None,
|
||||||
radius: float = None,
|
radius: float = None,
|
||||||
mls_only: bool = False,
|
mls_only: bool = False,
|
||||||
past_days: int = None,
|
past_days: int = None,
|
||||||
@@ -21,7 +21,7 @@ def scrape_property(
|
|||||||
extra_property_data: bool = True,
|
extra_property_data: bool = True,
|
||||||
exclude_pending: bool = False,
|
exclude_pending: bool = False,
|
||||||
limit: int = 10000
|
limit: int = 10000
|
||||||
) -> pd.DataFrame | list[dict] | list[Property]:
|
) -> Union[pd.DataFrame, list[dict], list[Property]]:
|
||||||
"""
|
"""
|
||||||
Scrape properties from Realtor.com based on a given location and listing type.
|
Scrape properties from Realtor.com based on a given location and listing type.
|
||||||
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
|
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
|
||||||
|
|||||||
@@ -121,7 +121,10 @@ class RealtorScraper(Scraper):
|
|||||||
|
|
||||||
property_info = response_json["data"]["home"]
|
property_info = response_json["data"]["home"]
|
||||||
|
|
||||||
return [self.process_property(property_info)]
|
if self.return_type != ReturnType.raw:
|
||||||
|
return [self.process_property(property_info)]
|
||||||
|
else:
|
||||||
|
return [property_info]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None:
|
def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "homeharvest"
|
name = "homeharvest"
|
||||||
version = "0.4.8"
|
version = "0.4.11"
|
||||||
description = "Real estate scraping library"
|
description = "Real estate scraping library"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||||
|
|||||||
@@ -292,11 +292,14 @@ def test_phone_number_matching():
|
|||||||
|
|
||||||
def test_return_type():
|
def test_return_type():
|
||||||
results = {
|
results = {
|
||||||
"pandas": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100),
|
"pandas": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100)],
|
||||||
"pydantic": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic"),
|
"pydantic": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic")],
|
||||||
"raw": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
|
"raw": [
|
||||||
|
scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
|
||||||
|
scrape_property(location="66642", listing_type="for_rent", limit=100, return_type="raw"),
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
assert isinstance(results["pandas"], pd.DataFrame)
|
assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
|
||||||
assert isinstance(results["pydantic"][0], Property)
|
assert all(isinstance(result[0], Property) for result in results["pydantic"])
|
||||||
assert isinstance(results["raw"][0], dict)
|
assert all(isinstance(result[0], dict) for result in results["raw"])
|
||||||
|
|||||||
Reference in New Issue
Block a user