HomeHarvest/homeharvest/__init__.py

51 lines
1.9 KiB
Python
Raw Normal View History

import warnings
import pandas as pd
from .core.scrapers import ScraperInput
from .utils import process_result, ordered_properties, validate_input
2023-09-15 20:58:54 -07:00
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.models import ListingType
from .exceptions import InvalidListingType, NoResultsFound
2023-09-15 15:17:37 -07:00
def scrape_property(
location: str,
listing_type: str = "for_sale",
radius: float = None,
mls_only: bool = False,
property_younger_than: int = None,
2023-10-04 18:25:01 -07:00
pending_or_contingent: bool = False,
proxy: str = None,
) -> pd.DataFrame:
2023-09-18 08:37:07 -07:00
"""
Scrape properties from Realtor.com based on a given location and listing type.
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
:param listing_type: Listing Type (for_sale, for_rent, sold)
:param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
:param mls_only: If set, fetches only listings with MLS IDs.
:param property_younger_than: Get properties sold/listed in last _ days.
2023-10-04 18:25:01 -07:00
:param pending_or_contingent: If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.
:param proxy: Proxy to use for scraping
2023-09-18 08:37:07 -07:00
"""
validate_input(listing_type)
2023-09-17 16:52:34 -07:00
2023-09-15 15:17:37 -07:00
scraper_input = ScraperInput(
location=location,
listing_type=ListingType[listing_type.upper()],
2023-09-19 13:43:24 -07:00
proxy=proxy,
2023-10-02 13:58:47 -07:00
radius=radius,
mls_only=mls_only,
last_x_days=property_younger_than,
2023-10-04 18:25:01 -07:00
pending_or_contingent=pending_or_contingent,
2023-09-15 15:17:37 -07:00
)
site = RealtorScraper(scraper_input)
2023-09-17 16:30:37 -07:00
results = site.search()
2023-09-15 15:17:37 -07:00
2023-10-03 22:21:16 -07:00
properties_dfs = [process_result(result) for result in results]
2023-09-18 11:38:17 -07:00
if not properties_dfs:
raise NoResultsFound("no results found for the query")
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties]