[enh] date_to and date_from

This commit is contained in:
Cullen Watson
2023-11-03 18:35:41 -05:00
parent c597a78191
commit 4edad901c5
8 changed files with 108 additions and 60 deletions

View File

@@ -1,10 +1,9 @@
import warnings
import pandas as pd
from .core.scrapers import ScraperInput
from .utils import process_result, ordered_properties, validate_input
from .utils import process_result, ordered_properties, validate_input, validate_dates
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.models import ListingType
from .exceptions import InvalidListingType, NoResultsFound
def scrape_property(
@@ -14,6 +13,8 @@ def scrape_property(
mls_only: bool = False,
past_days: int = None,
proxy: str = None,
date_from: str = None,
date_to: str = None,
) -> pd.DataFrame:
"""
Scrape properties from Realtor.com based on a given location and listing type.
@@ -22,9 +23,11 @@ def scrape_property(
:param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
:param mls_only: If set, fetches only listings with MLS IDs.
:param past_days: Get properties sold or listed (dependent on your listing_type) in the last _ days.
:param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates. format: 2021-01-28
:param proxy: Proxy to use for scraping
"""
validate_input(listing_type)
validate_dates(date_from, date_to)
scraper_input = ScraperInput(
location=location,
@@ -33,6 +36,8 @@ def scrape_property(
radius=radius,
mls_only=mls_only,
last_x_days=past_days,
date_from=date_from,
date_to=date_to,
)
site = RealtorScraper(scraper_input)
@@ -40,7 +45,7 @@ def scrape_property(
properties_dfs = [process_result(result) for result in results]
if not properties_dfs:
raise NoResultsFound("no results found for the query")
return pd.DataFrame()
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)

View File

@@ -11,6 +11,8 @@ class ScraperInput:
mls_only: bool | None = None
proxy: str | None = None
last_x_days: int | None = None
date_from: str | None = None
date_to: str | None = None
class Scraper:
@@ -36,6 +38,8 @@ class Scraper:
self.radius = scraper_input.radius
self.last_x_days = scraper_input.last_x_days
self.mls_only = scraper_input.mls_only
self.date_from = scraper_input.date_from
self.date_to = scraper_input.date_to
def search(self) -> list[Property]:
...

View File

@@ -9,7 +9,6 @@ from typing import Dict, Union, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from .. import Scraper
from ....exceptions import NoResultsFound
from ..models import Property, Address, ListingType, Description
@@ -38,7 +37,7 @@ class RealtorScraper(Scraper):
result = response_json["autocomplete"]
if not result:
raise NoResultsFound("No results found for location: " + self.location)
return None
return result[0]
@@ -336,15 +335,17 @@ class RealtorScraper(Scraper):
}
}"""
date_param = (
'sold_date: { min: "$today-%sD" }' % self.last_x_days
if self.listing_type == ListingType.SOLD and self.last_x_days
else (
'list_date: { min: "$today-%sD" }' % self.last_x_days
if self.last_x_days
else ""
)
)
date_param = ""
if self.listing_type == ListingType.SOLD:
if self.date_from and self.date_to:
date_param = f'sold_date: {{ min: "{self.date_from}", max: "{self.date_to}" }}'
elif self.last_x_days:
date_param = f'sold_date: {{ min: "$today-{self.last_x_days}D" }}'
else:
if self.date_from and self.date_to:
date_param = f'list_date: {{ min: "{self.date_from}", max: "{self.date_to}" }}'
elif self.last_x_days:
date_param = f'list_date: {{ min: "$today-{self.last_x_days}D" }}'
sort_param = (
"sort: [{ field: sold_date, direction: desc }]"
@@ -509,6 +510,9 @@ class RealtorScraper(Scraper):
def search(self):
location_info = self.handle_location()
if not location_info:
return []
location_type = location_info["area_type"]
search_variables = {

View File

@@ -1,6 +1,5 @@
class InvalidListingType(Exception):
"""Raised when a provided listing type is does not exist."""
class NoResultsFound(Exception):
"""Raised when no results are found for the given location"""
class InvalidDate(Exception):
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23 """

View File

@@ -1,6 +1,7 @@
from .core.scrapers.models import Property, ListingType
import pandas as pd
from .exceptions import InvalidListingType
from datetime import datetime
from .core.scrapers.models import Property, ListingType
from .exceptions import InvalidListingType, InvalidDate
ordered_properties = [
"property_url",
@@ -70,3 +71,18 @@ def validate_input(listing_type: str) -> None:
raise InvalidListingType(
f"Provided listing type, '{listing_type}', does not exist."
)
def validate_dates(date_from: str | None, date_to: str | None) -> None:
if (date_from is not None and date_to is None) or (date_from is None and date_to is not None):
raise InvalidDate("Both date_from and date_to must be provided.")
if date_from and date_to:
try:
date_from_obj = datetime.strptime(date_from, "%Y-%m-%d")
date_to_obj = datetime.strptime(date_to, "%Y-%m-%d")
if date_to_obj < date_from_obj:
raise InvalidDate("date_to must be after date_from.")
except ValueError as e:
raise InvalidDate(f"Invalid date format or range")