Compare commits

..

7 Commits

Author SHA1 Message Date
Zachary Hampton
ccf5786ce2 Merge pull request #127 from Alexandre-Shofstall/fix/python39-compat
Fix typing syntax for Python 3.9 compatibility in __init__.py
2025-07-03 09:43:26 -07:00
Zachary Hampton
b4f05b254a Update pyproject.toml 2025-07-03 09:43:10 -07:00
Alexandre Shofstall
941d1081f7 Fix typing syntax for Python 3.9 compatibility in __init__.py 2025-07-03 18:11:18 +02:00
Zachary Hampton
c788b3318d Update README.md 2025-06-19 16:52:14 -07:00
zachary
68a3438c6e - single home return type bug fix 2025-05-05 12:29:36 -07:00
zachary
a3c5e9060e - updated queries 2025-05-03 13:55:56 -07:00
zachary
d06595fe56 - updated queries 2025-05-03 13:28:12 -07:00
6 changed files with 41 additions and 16 deletions

View File

@@ -8,8 +8,6 @@
- **Data Format**: Structures data to resemble MLS listings. - **Data Format**: Structures data to resemble MLS listings.
- **Export Flexibility**: Options to save as either CSV or Excel. - **Export Flexibility**: Options to save as either CSV or Excel.
[Video Guide for HomeHarvest](https://youtu.be/J1qgNPgmSLI) - _updated for release v0.3.4_
![homeharvest](https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/b3d5d727-e67b-4a9f-85d8-1e65fd18620a) ![homeharvest](https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/b3d5d727-e67b-4a9f-85d8-1e65fd18620a)
## Installation ## Installation

View File

@@ -4,13 +4,13 @@ from .core.scrapers import ScraperInput
from .utils import process_result, ordered_properties, validate_input, validate_dates, validate_limit from .utils import process_result, ordered_properties, validate_input, validate_dates, validate_limit
from .core.scrapers.realtor import RealtorScraper from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property from .core.scrapers.models import ListingType, SearchPropertyType, ReturnType, Property
from typing import Optional, List
def scrape_property( def scrape_property(
location: str, location: str,
listing_type: str = "for_sale", listing_type: str = "for_sale",
return_type: str = "pandas", return_type: str = "pandas",
property_type: list[str] | None = None, property_type: Optional[List[str]] = None,
radius: float = None, radius: float = None,
mls_only: bool = False, mls_only: bool = False,
past_days: int = None, past_days: int = None,

View File

@@ -121,7 +121,10 @@ class RealtorScraper(Scraper):
property_info = response_json["data"]["home"] property_info = response_json["data"]["home"]
return [self.process_property(property_info)] if self.return_type != ReturnType.raw:
return [self.process_property(property_info)]
else:
return [property_info]
@staticmethod @staticmethod
def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None: def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None:
@@ -305,15 +308,20 @@ class RealtorScraper(Scraper):
) )
elif search_type == "area": #: general search, came from a general location elif search_type == "area": #: general search, came from a general location
query = """query Home_search( query = """query Home_search(
$location: String!, $city: String,
$county: [String],
$state_code: String,
$postal_code: String
$offset: Int, $offset: Int,
) { ) {
home_search( home_search(
query: { query: {
%s %s
search_location: {location: $location} city: $city
county: $county
postal_code: $postal_code
state_code: $state_code
status: %s status: %s
unique: true
%s %s
%s %s
%s %s
@@ -439,7 +447,11 @@ class RealtorScraper(Scraper):
else: #: general search, location else: #: general search, location
search_variables |= { search_variables |= {
"location": self.location, "city": location_info.get("city"),
"county": location_info.get("county"),
"state_code": location_info.get("state_code"),
"postal_code": location_info.get("postal_code"),
} }
if self.foreclosure: if self.foreclosure:

View File

@@ -36,6 +36,13 @@ _SEARCH_HOMES_DATA_BASE = """{
sqft sqft
__typename __typename
} }
photos(https: true) {
title
href
tags {
label
}
}
list_price list_price
__typename __typename
} }
@@ -90,12 +97,17 @@ _SEARCH_HOMES_DATA_BASE = """{
} }
} }
tax_record { tax_record {
cl_id
public_record_id public_record_id
last_update_date
apn
tax_parcel_id
} }
primary_photo(https: true) { primary_photo(https: true) {
href href
} }
photos(https: true) { photos(https: true) {
title
href href
tags { tags {
label label

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "homeharvest" name = "homeharvest"
version = "0.4.7" version = "0.4.10"
description = "Real estate scraping library" description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest" homepage = "https://github.com/Bunsly/HomeHarvest"

View File

@@ -292,11 +292,14 @@ def test_phone_number_matching():
def test_return_type(): def test_return_type():
results = { results = {
"pandas": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100), "pandas": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100)],
"pydantic": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic"), "pydantic": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic")],
"raw": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"), "raw": [
scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
scrape_property(location="66642", listing_type="for_rent", limit=100, return_type="raw"),
],
} }
assert isinstance(results["pandas"], pd.DataFrame) assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
assert isinstance(results["pydantic"][0], Property) assert all(isinstance(result[0], Property) for result in results["pydantic"])
assert isinstance(results["raw"][0], dict) assert all(isinstance(result[0], dict) for result in results["raw"])