Compare commits

...

12 Commits

Author SHA1 Message Date
Zachary Hampton
0d85100091 - update dependencies 2025-07-14 17:08:27 -07:00
Zachary Hampton
851ba53d81 Merge pull request #128 from Alexandre-Shofstall/fix/python39-compat
Fix syntax of __init__ line 24
2025-07-03 10:28:49 -07:00
Zachary Hampton
0fdc309262 Update pyproject.toml 2025-07-03 10:28:14 -07:00
Alexandre Shofstall
62b6726d42 Fix syntax of __init__ line 24 2025-07-03 19:20:49 +02:00
Zachary Hampton
ccf5786ce2 Merge pull request #127 from Alexandre-Shofstall/fix/python39-compat
Fix typing syntax for Python 3.9 compatibility in __init__.py
2025-07-03 09:43:26 -07:00
Zachary Hampton
b4f05b254a Update pyproject.toml 2025-07-03 09:43:10 -07:00
Alexandre Shofstall
941d1081f7 Fix typing syntax for Python 3.9 compatibility in __init__.py 2025-07-03 18:11:18 +02:00
Zachary Hampton
c788b3318d Update README.md 2025-06-19 16:52:14 -07:00
zachary
68a3438c6e - single home return type bug fix 2025-05-05 12:29:36 -07:00
zachary
a3c5e9060e - updated queries 2025-05-03 13:55:56 -07:00
zachary
d06595fe56 - updated queries 2025-05-03 13:28:12 -07:00
zachary
e378feeefe - bug fixes 2025-04-12 18:34:35 -07:00
8 changed files with 673 additions and 466 deletions

View File

@@ -8,8 +8,6 @@
- **Data Format**: Structures data to resemble MLS listings. - **Data Format**: Structures data to resemble MLS listings.
- **Export Flexibility**: Options to save as either CSV or Excel. - **Export Flexibility**: Options to save as either CSV or Excel.
[Video Guide for HomeHarvest](https://youtu.be/J1qgNPgmSLI) - _updated for release v0.3.4_
![homeharvest](https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/b3d5d727-e67b-4a9f-85d8-1e65fd18620a) ![homeharvest](https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/b3d5d727-e67b-4a9f-85d8-1e65fd18620a)
## Installation ## Installation

View File

@@ -4,13 +4,13 @@ from .core.scrapers import ScraperInput
from .utils import process_result, ordered_properties, validate_input, validate_dates, validate_limit from .utils import process_result, ordered_properties, validate_input, validate_dates, validate_limit
from .core.scrapers.realtor import RealtorScraper from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property
from typing import Union, Optional, List
def scrape_property( def scrape_property(
location: str, location: str,
listing_type: str = "for_sale", listing_type: str = "for_sale",
return_type: str = "pandas", return_type: str = "pandas",
property_type: list[str] | None = None, property_type: Optional[List[str]] = None,
radius: float = None, radius: float = None,
mls_only: bool = False, mls_only: bool = False,
past_days: int = None, past_days: int = None,
@@ -21,7 +21,7 @@ def scrape_property(
extra_property_data: bool = True, extra_property_data: bool = True,
exclude_pending: bool = False, exclude_pending: bool = False,
limit: int = 10000 limit: int = 10000
) -> pd.DataFrame | list[dict] | list[Property]: ) -> Union[pd.DataFrame, list[dict], list[Property]]:
""" """
Scrape properties from Realtor.com based on a given location and listing type. Scrape properties from Realtor.com based on a given location and listing type.
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way") :param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")

View File

@@ -25,6 +25,7 @@ class SiteName(Enum):
class SearchPropertyType(Enum): class SearchPropertyType(Enum):
SINGLE_FAMILY = "single_family" SINGLE_FAMILY = "single_family"
APARTMENT = "apartment"
CONDOS = "condos" CONDOS = "condos"
CONDO_TOWNHOME_ROWHOME_COOP = "condo_townhome_rowhome_coop" CONDO_TOWNHOME_ROWHOME_COOP = "condo_townhome_rowhome_coop"
CONDO_TOWNHOME = "condo_townhome" CONDO_TOWNHOME = "condo_townhome"

View File

@@ -121,7 +121,10 @@ class RealtorScraper(Scraper):
property_info = response_json["data"]["home"] property_info = response_json["data"]["home"]
return [self.process_property(property_info)] if self.return_type != ReturnType.raw:
return [self.process_property(property_info)]
else:
return [property_info]
@staticmethod @staticmethod
def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None: def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None:
@@ -257,7 +260,7 @@ class RealtorScraper(Scraper):
sort_param = ( sort_param = (
"sort: [{ field: sold_date, direction: desc }]" "sort: [{ field: sold_date, direction: desc }]"
if self.listing_type == ListingType.SOLD if self.listing_type == ListingType.SOLD
else "sort: [{ field: list_date, direction: desc }]" else "" #: "sort: [{ field: list_date, direction: desc }]" #: prioritize normal fractal sort from realtor
) )
pending_or_contingent_param = ( pending_or_contingent_param = (
@@ -448,6 +451,7 @@ class RealtorScraper(Scraper):
"county": location_info.get("county"), "county": location_info.get("county"),
"state_code": location_info.get("state_code"), "state_code": location_info.get("state_code"),
"postal_code": location_info.get("postal_code"), "postal_code": location_info.get("postal_code"),
} }
if self.foreclosure: if self.foreclosure:

View File

@@ -36,6 +36,13 @@ _SEARCH_HOMES_DATA_BASE = """{
sqft sqft
__typename __typename
} }
photos(https: true) {
title
href
tags {
label
}
}
list_price list_price
__typename __typename
} }
@@ -90,12 +97,17 @@ _SEARCH_HOMES_DATA_BASE = """{
} }
} }
tax_record { tax_record {
cl_id
public_record_id public_record_id
last_update_date
apn
tax_parcel_id
} }
primary_photo(https: true) { primary_photo(https: true) {
href href
} }
photos(https: true) { photos(https: true) {
title
href href
tags { tags {
label label
@@ -220,19 +232,19 @@ HOMES_DATA = """%s
}""" % _SEARCH_HOMES_DATA_BASE }""" % _SEARCH_HOMES_DATA_BASE
SEARCH_HOMES_DATA = """%s SEARCH_HOMES_DATA = """%s
current_estimates { current_estimates {
__typename __typename
source { source {
__typename __typename
type type
name name
} }
estimate estimate
estimateHigh: estimate_high estimateHigh: estimate_high
estimateLow: estimate_low estimateLow: estimate_low
date date
isBestHomeValue: isbest_homevalue isBestHomeValue: isbest_homevalue
} }
}""" % _SEARCH_HOMES_DATA_BASE }""" % _SEARCH_HOMES_DATA_BASE
GENERAL_RESULTS_QUERY = """{ GENERAL_RESULTS_QUERY = """{

1059
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "homeharvest" name = "homeharvest"
version = "0.4.6" version = "0.4.12"
description = "Real estate scraping library" description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest" homepage = "https://github.com/Bunsly/HomeHarvest"
@@ -11,10 +11,10 @@ homeharvest = "homeharvest.cli:main"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.9,<3.13" python = ">=3.9,<3.13"
requests = "^2.31.0" requests = "^2.32.4"
pandas = "^2.1.1" pandas = "^2.3.1"
pydantic = "^2.7.4" pydantic = "^2.11.7"
tenacity = "^9.0.0" tenacity = "^9.1.2"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]

View File

@@ -292,11 +292,14 @@ def test_phone_number_matching():
def test_return_type(): def test_return_type():
results = { results = {
"pandas": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100), "pandas": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100)],
"pydantic": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic"), "pydantic": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic")],
"raw": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"), "raw": [
scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
scrape_property(location="66642", listing_type="for_rent", limit=100, return_type="raw"),
],
} }
assert isinstance(results["pandas"], pd.DataFrame) assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
assert isinstance(results["pydantic"][0], Property) assert all(isinstance(result[0], Property) for result in results["pydantic"])
assert isinstance(results["raw"][0], dict) assert all(isinstance(result[0], dict) for result in results["raw"])