From 088088ae515a70c7a9eac6379ca29dbd86298abd Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Tue, 3 Oct 2023 15:05:17 -0700 Subject: [PATCH] - last x days param --- homeharvest/__init__.py | 9 ++++--- homeharvest/core/scrapers/__init__.py | 2 ++ homeharvest/core/scrapers/realtor/__init__.py | 24 +++++++++++++++---- tests/test_realtor.py | 14 ++++++++++- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index f489674..332ff7e 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -106,7 +106,7 @@ def _process_result(result: Property) -> pd.DataFrame: return properties_df -def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None) -> pd.DataFrame: +def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None, sold_last_x_days: int = None) -> pd.DataFrame: """ Helper function to scrape a single site. """ @@ -118,6 +118,7 @@ def _scrape_single_site(location: str, site_name: str, listing_type: str, radius site_name=SiteName.get_by_value(site_name.lower()), proxy=proxy, radius=radius, + sold_last_x_days=sold_last_x_days ) site = _scrapers[site_name.lower()](scraper_input) @@ -136,12 +137,14 @@ def scrape_property( site_name: Union[str, list[str]] = "realtor.com", listing_type: str = "for_sale", radius: float = None, + sold_last_x_days: int = None, proxy: str = None, keep_duplicates: bool = False ) -> pd.DataFrame: """ Scrape property from various sites from a given location and listing type. + :param sold_last_x_days: Sold in last x days :param radius: Radius in miles to find comparable properties on individual addresses :param keep_duplicates: :param proxy: @@ -160,12 +163,12 @@ def scrape_property( results = [] if len(site_name) == 1: - final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy) + final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy, sold_last_x_days) results.append(final_df) else: with ThreadPoolExecutor() as executor: futures = { - executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy): s_name + executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy, sold_last_x_days): s_name for s_name in site_name } diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 0ab548b..bc418e3 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -11,6 +11,7 @@ class ScraperInput: site_name: SiteName radius: float | None = None proxy: str | None = None + sold_last_x_days: int | None = None class Scraper: @@ -31,6 +32,7 @@ class Scraper: self.listing_type = scraper_input.listing_type self.site_name = scraper_input.site_name self.radius = scraper_input.radius + self.sold_last_x_days = scraper_input.sold_last_x_days def search(self) -> list[Property]: ... diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index e1cb8e7..6449dfd 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -4,7 +4,7 @@ homeharvest.realtor.__init__ This module implements the scraper for relator.com """ -from ..models import Property, Address +from ..models import Property, Address, ListingType from .. import Scraper from ....exceptions import NoResultsFound from ....utils import parse_address_one, parse_address_two @@ -204,6 +204,10 @@ class RealtorScraper(Scraper): } }}""" + sold_date_param = ('sold_date: { min: "$today-%sD" }' % self.sold_last_x_days + if self.listing_type == ListingType.SOLD and self.sold_last_x_days is not None + else "") + if not is_for_comps: query = ( """query Home_search( @@ -220,11 +224,17 @@ class RealtorScraper(Scraper): postal_code: $postal_code state_code: $state_code status: %s + %s } limit: 200 offset: $offset ) %s""" - % (self.listing_type.value.lower(), results_query)) + % ( + self.listing_type.value.lower(), + sold_date_param, + results_query + ) + ) else: query = ( """query Property_search( @@ -233,10 +243,16 @@ class RealtorScraper(Scraper): $offset: Int!, ) { property_search( - query: { nearby: { coordinates: $coordinates, radius: $radius } } + query: { + nearby: { + coordinates: $coordinates + radius: $radius + } + %s + } limit: 200 offset: $offset - ) %s""" % results_query) + ) %s""" % (sold_date_param, results_query)) payload = { "query": query, diff --git a/tests/test_realtor.py b/tests/test_realtor.py index db8cb51..56d3ef3 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -14,7 +14,19 @@ def test_realtor_comps(): radius=0.5, ) - print(result) + assert result is not None and len(result) > 0 + + +def test_realtor_last_x_days_sold(): + days_result_30 = scrape_property( + location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=30 + ) + + days_result_10 = scrape_property( + location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=10 + ) + + assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len(days_result_10) def test_realtor():