diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index d26f981..b0599be 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -8,7 +8,7 @@ from typing import Union, Optional, List def scrape_property( location: str, - listing_type: str = "for_sale", + listing_type: str | list[str] | None = None, return_type: str = "pandas", property_type: Optional[List[str]] = None, radius: float = None, @@ -47,7 +47,9 @@ def scrape_property( Scrape properties from Realtor.com based on a given location and listing type. :param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way") - :param listing_type: Listing Type (for_sale, for_rent, sold, pending) + :param listing_type: Listing Type - can be a string, list of strings, or None. + Options: for_sale, for_rent, sold, pending, off_market, new_community, other, ready_to_build + Examples: "for_sale", ["for_sale", "pending"], None (returns all types) :param return_type: Return type (pandas, pydantic, raw) :param property_type: Property Type (single_family, multi_family, condos, condo_townhome_rowhome_coop, condo_townhome, townhomes, duplex_triplex, farm, land, mobile) :param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses. @@ -73,7 +75,7 @@ def scrape_property( :param price_min, price_max: Filter by listing price :param lot_sqft_min, lot_sqft_max: Filter by lot size :param year_built_min, year_built_max: Filter by year built - :param sort_by: Sort results by field (list_date, sold_date, list_price, sqft, beds, baths) + :param sort_by: Sort results by field (list_date, sold_date, list_price, sqft, beds, baths, last_update_date) :param sort_direction: Sort direction (asc, desc) """ validate_input(listing_type) @@ -88,9 +90,17 @@ def scrape_property( ) validate_sort(sort_by, sort_direction) + # Convert listing_type to appropriate format + if listing_type is None: + converted_listing_type = None + elif isinstance(listing_type, list): + converted_listing_type = [ListingType(lt.upper()) for lt in listing_type] + else: + converted_listing_type = ListingType(listing_type.upper()) + scraper_input = ScraperInput( location=location, - listing_type=ListingType(listing_type.upper()), + listing_type=converted_listing_type, return_type=ReturnType(return_type.lower()), property_type=[SearchPropertyType[prop.upper()] for prop in property_type] if property_type else None, proxy=proxy, diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 9c37f40..caeda2c 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -13,7 +13,7 @@ from pydantic import BaseModel class ScraperInput(BaseModel): location: str - listing_type: ListingType + listing_type: ListingType | list[ListingType] | None property_type: list[SearchPropertyType] | None = None radius: float | None = None mls_only: bool | None = False diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index 833e99d..55aacca 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -43,6 +43,10 @@ class ListingType(Enum): FOR_RENT = "FOR_RENT" PENDING = "PENDING" SOLD = "SOLD" + OFF_MARKET = "OFF_MARKET" + NEW_COMMUNITY = "NEW_COMMUNITY" + OTHER = "OTHER" + READY_TO_BUILD = "READY_TO_BUILD" class PropertyType(Enum): @@ -193,6 +197,7 @@ class Property(BaseModel): pending_date: datetime | None = Field(None, description="The date listing went into pending state") last_sold_date: datetime | None = Field(None, description="Last time the Home was sold") last_status_change_date: datetime | None = Field(None, description="Last time the status of the listing changed") + last_update_date: datetime | None = Field(None, description="Last time the home was updated") prc_sqft: int | None = None new_construction: bool | None = Field(None, description="Search for new construction homes") hoa_fee: int | None = Field(None, description="Search for homes where HOA fee is known and falls within specified range") diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index f70aff1..b3dbe8f 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -46,9 +46,17 @@ class RealtorScraper(Scraper): super().__init__(scraper_input) def handle_location(self): + # Get client_id from listing_type + if self.listing_type is None: + client_id = "for-sale" + elif isinstance(self.listing_type, list): + client_id = self.listing_type[0].value.lower().replace("_", "-") if self.listing_type else "for-sale" + else: + client_id = self.listing_type.value.lower().replace("_", "-") + params = { "input": self.location, - "client_id": self.listing_type.value.lower().replace("_", "-"), + "client_id": client_id, "limit": "1", "area_types": "city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park", } @@ -134,14 +142,25 @@ class RealtorScraper(Scraper): date_param = "" # Determine date field based on listing type - if self.listing_type == ListingType.SOLD: - date_field = "sold_date" - elif self.listing_type in [ListingType.FOR_SALE, ListingType.FOR_RENT]: - date_field = "list_date" - else: # PENDING - # Skip server-side date filtering for PENDING as both pending_date and contract_date - # filters are broken in the API. Client-side filtering will be applied later. - date_field = None + # Convert listing_type to list for uniform handling + if self.listing_type is None: + listing_types = [] + date_field = None # When no listing_type is specified, skip date filtering + elif isinstance(self.listing_type, list): + listing_types = self.listing_type + # For multiple types, we'll use a general date field or skip + date_field = None # Skip date filtering for mixed types + else: + listing_types = [self.listing_type] + # Determine date field for single type + if self.listing_type == ListingType.SOLD: + date_field = "sold_date" + elif self.listing_type in [ListingType.FOR_SALE, ListingType.FOR_RENT]: + date_field = "list_date" + else: # PENDING or other types + # Skip server-side date filtering for PENDING as both pending_date and contract_date + # filters are broken in the API. Client-side filtering will be applied later. + date_field = None # Build date parameter (expand to full days if hour-based filtering is used) if date_field: @@ -250,13 +269,15 @@ class RealtorScraper(Scraper): # Build sort parameter if self.sort_by: sort_param = f"sort: [{{ field: {self.sort_by}, direction: {self.sort_direction} }}]" - elif self.listing_type == ListingType.SOLD: + elif isinstance(self.listing_type, ListingType) and self.listing_type == ListingType.SOLD: sort_param = "sort: [{ field: sold_date, direction: desc }]" else: sort_param = "" #: prioritize normal fractal sort from realtor + # Handle PENDING with or_filters (applies if PENDING is in the list or is the single type) + has_pending = ListingType.PENDING in listing_types pending_or_contingent_param = ( - "or_filters: { contingent: true, pending: true }" if self.listing_type == ListingType.PENDING else "" + "or_filters: { contingent: true, pending: true }" if has_pending else "" ) # Build bucket parameter (only use fractal sort if no custom sort is specified) @@ -264,7 +285,27 @@ class RealtorScraper(Scraper): if not self.sort_by: bucket_param = 'bucket: { sort: "fractal_v1.1.3_fr" }' - listing_type = ListingType.FOR_SALE if self.listing_type == ListingType.PENDING else self.listing_type + # Build status parameter + # For PENDING, we need to query as FOR_SALE with or_filters for pending/contingent + status_types = [] + for lt in listing_types: + if lt == ListingType.PENDING: + if ListingType.FOR_SALE not in status_types: + status_types.append(ListingType.FOR_SALE) + else: + if lt not in status_types: + status_types.append(lt) + + # Build status parameter string + if status_types: + status_values = [st.value.lower() for st in status_types] + if len(status_values) == 1: + status_param = f"status: {status_values[0]}" + else: + status_param = f"status: [{', '.join(status_values)}]" + else: + status_param = "" # No status parameter means return all types + is_foreclosure = "" if variables.get("foreclosure") is True: @@ -285,7 +326,7 @@ class RealtorScraper(Scraper): coordinates: $coordinates radius: $radius } - status: %s + %s %s %s %s @@ -297,7 +338,7 @@ class RealtorScraper(Scraper): ) %s }""" % ( is_foreclosure, - listing_type.value.lower(), + status_param, date_param, property_type_param, property_filters_param, @@ -320,7 +361,7 @@ class RealtorScraper(Scraper): county: $county postal_code: $postal_code state_code: $state_code - status: %s + %s %s %s %s @@ -333,7 +374,7 @@ class RealtorScraper(Scraper): ) %s }""" % ( is_foreclosure, - listing_type.value.lower(), + status_param, date_param, property_type_param, property_filters_param, @@ -781,7 +822,7 @@ class RealtorScraper(Scraper): return (1, 0) if self.sort_direction == "desc" else (1, float('inf')) # For datetime fields, convert string to datetime for proper sorting - if self.sort_by in ['list_date', 'sold_date', 'pending_date']: + if self.sort_by in ['list_date', 'sold_date', 'pending_date', 'last_update_date']: if isinstance(value, str): try: from datetime import datetime diff --git a/homeharvest/core/scrapers/realtor/processors.py b/homeharvest/core/scrapers/realtor/processors.py index de219bd..35e0d4f 100644 --- a/homeharvest/core/scrapers/realtor/processors.py +++ b/homeharvest/core/scrapers/realtor/processors.py @@ -126,6 +126,7 @@ def process_property(result: dict, mls_only: bool = False, extra_property_data: last_sold_date=(datetime.fromisoformat(result["last_sold_date"].replace('Z', '+00:00') if result["last_sold_date"].endswith('Z') else result["last_sold_date"]) if result.get("last_sold_date") else None), pending_date=(datetime.fromisoformat(result["pending_date"].replace('Z', '+00:00') if result["pending_date"].endswith('Z') else result["pending_date"]) if result.get("pending_date") else None), last_status_change_date=(datetime.fromisoformat(result["last_status_change_date"].replace('Z', '+00:00') if result["last_status_change_date"].endswith('Z') else result["last_status_change_date"]) if result.get("last_status_change_date") else None), + last_update_date=(datetime.fromisoformat(result["last_update_date"].replace('Z', '+00:00') if result["last_update_date"].endswith('Z') else result["last_update_date"]) if result.get("last_update_date") else None), new_construction=result["flags"].get("is_new_construction") is True, hoa_fee=(result["hoa"]["fee"] if result.get("hoa") and isinstance(result["hoa"], dict) else None), latitude=(result["location"]["address"]["coordinate"].get("lat") if able_to_get_lat_long else None), diff --git a/homeharvest/core/scrapers/realtor/queries.py b/homeharvest/core/scrapers/realtor/queries.py index 0cf6866..2a421d9 100644 --- a/homeharvest/core/scrapers/realtor/queries.py +++ b/homeharvest/core/scrapers/realtor/queries.py @@ -10,6 +10,7 @@ _SEARCH_HOMES_DATA_BASE = """{ last_sold_price last_sold_date last_status_change_date + last_update_date list_price list_price_max list_price_min diff --git a/homeharvest/utils.py b/homeharvest/utils.py index e1e228c..2b0973c 100644 --- a/homeharvest/utils.py +++ b/homeharvest/utils.py @@ -38,6 +38,7 @@ ordered_properties = [ "last_sold_date", "last_sold_price", "last_status_change_date", + "last_update_date", "assessed_value", "estimated_value", "tax", @@ -156,9 +157,17 @@ def process_result(result: Property) -> pd.DataFrame: return properties_df[ordered_properties] -def validate_input(listing_type: str) -> None: - if listing_type.upper() not in ListingType.__members__: - raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.") +def validate_input(listing_type: str | list[str] | None) -> None: + if listing_type is None: + return # None is valid - returns all types + + if isinstance(listing_type, list): + for lt in listing_type: + if lt.upper() not in ListingType.__members__: + raise InvalidListingType(f"Provided listing type, '{lt}', does not exist.") + else: + if listing_type.upper() not in ListingType.__members__: + raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.") def validate_dates(date_from: str | None, date_to: str | None) -> None: @@ -259,7 +268,7 @@ def validate_filters( def validate_sort(sort_by: str | None, sort_direction: str | None = "desc") -> None: """Validate sort parameters.""" - valid_sort_fields = ["list_date", "sold_date", "list_price", "sqft", "beds", "baths"] + valid_sort_fields = ["list_date", "sold_date", "list_price", "sqft", "beds", "baths", "last_update_date"] valid_directions = ["asc", "desc"] if sort_by and sort_by not in valid_sort_fields: