Compare commits

..

4 Commits

Author SHA1 Message Date
zachary
68a3438c6e - single home return type bug fix 2025-05-05 12:29:36 -07:00
zachary
a3c5e9060e - updated queries 2025-05-03 13:55:56 -07:00
zachary
d06595fe56 - updated queries 2025-05-03 13:28:12 -07:00
zachary
e378feeefe - bug fixes 2025-04-12 18:34:35 -07:00
5 changed files with 42 additions and 22 deletions

View File

@@ -25,6 +25,7 @@ class SiteName(Enum):
class SearchPropertyType(Enum): class SearchPropertyType(Enum):
SINGLE_FAMILY = "single_family" SINGLE_FAMILY = "single_family"
APARTMENT = "apartment"
CONDOS = "condos" CONDOS = "condos"
CONDO_TOWNHOME_ROWHOME_COOP = "condo_townhome_rowhome_coop" CONDO_TOWNHOME_ROWHOME_COOP = "condo_townhome_rowhome_coop"
CONDO_TOWNHOME = "condo_townhome" CONDO_TOWNHOME = "condo_townhome"

View File

@@ -121,7 +121,10 @@ class RealtorScraper(Scraper):
property_info = response_json["data"]["home"] property_info = response_json["data"]["home"]
return [self.process_property(property_info)] if self.return_type != ReturnType.raw:
return [self.process_property(property_info)]
else:
return [property_info]
@staticmethod @staticmethod
def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None: def process_advertisers(advertisers: list[dict] | None) -> Advertisers | None:
@@ -257,7 +260,7 @@ class RealtorScraper(Scraper):
sort_param = ( sort_param = (
"sort: [{ field: sold_date, direction: desc }]" "sort: [{ field: sold_date, direction: desc }]"
if self.listing_type == ListingType.SOLD if self.listing_type == ListingType.SOLD
else "sort: [{ field: list_date, direction: desc }]" else "" #: "sort: [{ field: list_date, direction: desc }]" #: prioritize normal fractal sort from realtor
) )
pending_or_contingent_param = ( pending_or_contingent_param = (
@@ -448,6 +451,7 @@ class RealtorScraper(Scraper):
"county": location_info.get("county"), "county": location_info.get("county"),
"state_code": location_info.get("state_code"), "state_code": location_info.get("state_code"),
"postal_code": location_info.get("postal_code"), "postal_code": location_info.get("postal_code"),
} }
if self.foreclosure: if self.foreclosure:

View File

@@ -36,6 +36,13 @@ _SEARCH_HOMES_DATA_BASE = """{
sqft sqft
__typename __typename
} }
photos(https: true) {
title
href
tags {
label
}
}
list_price list_price
__typename __typename
} }
@@ -90,12 +97,17 @@ _SEARCH_HOMES_DATA_BASE = """{
} }
} }
tax_record { tax_record {
cl_id
public_record_id public_record_id
last_update_date
apn
tax_parcel_id
} }
primary_photo(https: true) { primary_photo(https: true) {
href href
} }
photos(https: true) { photos(https: true) {
title
href href
tags { tags {
label label
@@ -220,19 +232,19 @@ HOMES_DATA = """%s
}""" % _SEARCH_HOMES_DATA_BASE }""" % _SEARCH_HOMES_DATA_BASE
SEARCH_HOMES_DATA = """%s SEARCH_HOMES_DATA = """%s
current_estimates { current_estimates {
__typename __typename
source { source {
__typename __typename
type type
name name
} }
estimate estimate
estimateHigh: estimate_high estimateHigh: estimate_high
estimateLow: estimate_low estimateLow: estimate_low
date date
isBestHomeValue: isbest_homevalue isBestHomeValue: isbest_homevalue
} }
}""" % _SEARCH_HOMES_DATA_BASE }""" % _SEARCH_HOMES_DATA_BASE
GENERAL_RESULTS_QUERY = """{ GENERAL_RESULTS_QUERY = """{

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "homeharvest" name = "homeharvest"
version = "0.4.6" version = "0.4.9"
description = "Real estate scraping library" description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest" homepage = "https://github.com/Bunsly/HomeHarvest"

View File

@@ -292,11 +292,14 @@ def test_phone_number_matching():
def test_return_type(): def test_return_type():
results = { results = {
"pandas": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100), "pandas": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100)],
"pydantic": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic"), "pydantic": [scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="pydantic")],
"raw": scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"), "raw": [
scrape_property(location="Surprise, AZ", listing_type="for_rent", limit=100, return_type="raw"),
scrape_property(location="66642", listing_type="for_rent", limit=100, return_type="raw"),
],
} }
assert isinstance(results["pandas"], pd.DataFrame) assert all(isinstance(result, pd.DataFrame) for result in results["pandas"])
assert isinstance(results["pydantic"][0], Property) assert all(isinstance(result[0], Property) for result in results["pydantic"])
assert isinstance(results["raw"][0], dict) assert all(isinstance(result[0], dict) for result in results["raw"])