2023-10-04 08:11:53 -07:00
|
|
|
import warnings
|
2023-09-18 14:18:22 -07:00
|
|
|
import pandas as pd
|
|
|
|
from .core.scrapers import ScraperInput
|
2023-10-04 08:11:53 -07:00
|
|
|
from .utils import process_result, ordered_properties, validate_input
|
2023-09-15 20:58:54 -07:00
|
|
|
from .core.scrapers.realtor import RealtorScraper
|
2023-10-04 08:11:53 -07:00
|
|
|
from .core.scrapers.models import ListingType
|
|
|
|
from .exceptions import InvalidListingType, NoResultsFound
|
2023-09-15 15:17:37 -07:00
|
|
|
|
2023-09-18 20:28:03 -07:00
|
|
|
|
2023-10-04 08:11:53 -07:00
|
|
|
def scrape_property(
|
|
|
|
location: str,
|
|
|
|
listing_type: str = "for_sale",
|
|
|
|
radius: float = None,
|
|
|
|
mls_only: bool = False,
|
|
|
|
last_x_days: int = None,
|
|
|
|
proxy: str = None,
|
|
|
|
) -> pd.DataFrame:
|
2023-09-18 08:37:07 -07:00
|
|
|
"""
|
2023-10-04 08:11:53 -07:00
|
|
|
Scrape properties from Realtor.com based on a given location and listing type.
|
2023-09-18 08:37:07 -07:00
|
|
|
"""
|
2023-10-04 08:11:53 -07:00
|
|
|
validate_input(listing_type)
|
2023-09-17 16:52:34 -07:00
|
|
|
|
2023-09-15 15:17:37 -07:00
|
|
|
scraper_input = ScraperInput(
|
|
|
|
location=location,
|
|
|
|
listing_type=ListingType[listing_type.upper()],
|
2023-09-19 13:43:24 -07:00
|
|
|
proxy=proxy,
|
2023-10-02 13:58:47 -07:00
|
|
|
radius=radius,
|
2023-10-04 08:11:53 -07:00
|
|
|
mls_only=mls_only,
|
|
|
|
last_x_days=last_x_days,
|
2023-09-15 15:17:37 -07:00
|
|
|
)
|
|
|
|
|
2023-10-04 08:11:53 -07:00
|
|
|
site = RealtorScraper(scraper_input)
|
2023-09-17 16:30:37 -07:00
|
|
|
results = site.search()
|
2023-09-15 15:17:37 -07:00
|
|
|
|
2023-10-03 22:21:16 -07:00
|
|
|
properties_dfs = [process_result(result) for result in results]
|
2023-09-18 11:38:17 -07:00
|
|
|
if not properties_dfs:
|
2023-10-04 08:11:53 -07:00
|
|
|
raise NoResultsFound("no results found for the query")
|
2023-09-18 20:28:03 -07:00
|
|
|
|
2023-10-04 08:11:53 -07:00
|
|
|
with warnings.catch_warnings():
|
|
|
|
warnings.simplefilter("ignore", category=FutureWarning)
|
|
|
|
return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties]
|