- last x days param

pull/31/head
Zachary Hampton 2023-10-03 15:05:17 -07:00
parent 40bbf76db1
commit 088088ae51
4 changed files with 41 additions and 8 deletions

View File

@ -106,7 +106,7 @@ def _process_result(result: Property) -> pd.DataFrame:
return properties_df return properties_df
def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None) -> pd.DataFrame: def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None, sold_last_x_days: int = None) -> pd.DataFrame:
""" """
Helper function to scrape a single site. Helper function to scrape a single site.
""" """
@ -118,6 +118,7 @@ def _scrape_single_site(location: str, site_name: str, listing_type: str, radius
site_name=SiteName.get_by_value(site_name.lower()), site_name=SiteName.get_by_value(site_name.lower()),
proxy=proxy, proxy=proxy,
radius=radius, radius=radius,
sold_last_x_days=sold_last_x_days
) )
site = _scrapers[site_name.lower()](scraper_input) site = _scrapers[site_name.lower()](scraper_input)
@ -136,12 +137,14 @@ def scrape_property(
site_name: Union[str, list[str]] = "realtor.com", site_name: Union[str, list[str]] = "realtor.com",
listing_type: str = "for_sale", listing_type: str = "for_sale",
radius: float = None, radius: float = None,
sold_last_x_days: int = None,
proxy: str = None, proxy: str = None,
keep_duplicates: bool = False keep_duplicates: bool = False
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Scrape property from various sites from a given location and listing type. Scrape property from various sites from a given location and listing type.
:param sold_last_x_days: Sold in last x days
:param radius: Radius in miles to find comparable properties on individual addresses :param radius: Radius in miles to find comparable properties on individual addresses
:param keep_duplicates: :param keep_duplicates:
:param proxy: :param proxy:
@ -160,12 +163,12 @@ def scrape_property(
results = [] results = []
if len(site_name) == 1: if len(site_name) == 1:
final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy) final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy, sold_last_x_days)
results.append(final_df) results.append(final_df)
else: else:
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
futures = { futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy): s_name executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy, sold_last_x_days): s_name
for s_name in site_name for s_name in site_name
} }

View File

@ -11,6 +11,7 @@ class ScraperInput:
site_name: SiteName site_name: SiteName
radius: float | None = None radius: float | None = None
proxy: str | None = None proxy: str | None = None
sold_last_x_days: int | None = None
class Scraper: class Scraper:
@ -31,6 +32,7 @@ class Scraper:
self.listing_type = scraper_input.listing_type self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name self.site_name = scraper_input.site_name
self.radius = scraper_input.radius self.radius = scraper_input.radius
self.sold_last_x_days = scraper_input.sold_last_x_days
def search(self) -> list[Property]: def search(self) -> list[Property]:
... ...

View File

@ -4,7 +4,7 @@ homeharvest.realtor.__init__
This module implements the scraper for relator.com This module implements the scraper for relator.com
""" """
from ..models import Property, Address from ..models import Property, Address, ListingType
from .. import Scraper from .. import Scraper
from ....exceptions import NoResultsFound from ....exceptions import NoResultsFound
from ....utils import parse_address_one, parse_address_two from ....utils import parse_address_one, parse_address_two
@ -204,6 +204,10 @@ class RealtorScraper(Scraper):
} }
}}""" }}"""
sold_date_param = ('sold_date: { min: "$today-%sD" }' % self.sold_last_x_days
if self.listing_type == ListingType.SOLD and self.sold_last_x_days is not None
else "")
if not is_for_comps: if not is_for_comps:
query = ( query = (
"""query Home_search( """query Home_search(
@ -220,11 +224,17 @@ class RealtorScraper(Scraper):
postal_code: $postal_code postal_code: $postal_code
state_code: $state_code state_code: $state_code
status: %s status: %s
%s
} }
limit: 200 limit: 200
offset: $offset offset: $offset
) %s""" ) %s"""
% (self.listing_type.value.lower(), results_query)) % (
self.listing_type.value.lower(),
sold_date_param,
results_query
)
)
else: else:
query = ( query = (
"""query Property_search( """query Property_search(
@ -233,10 +243,16 @@ class RealtorScraper(Scraper):
$offset: Int!, $offset: Int!,
) { ) {
property_search( property_search(
query: { nearby: { coordinates: $coordinates, radius: $radius } } query: {
nearby: {
coordinates: $coordinates
radius: $radius
}
%s
}
limit: 200 limit: 200
offset: $offset offset: $offset
) %s""" % results_query) ) %s""" % (sold_date_param, results_query))
payload = { payload = {
"query": query, "query": query,

View File

@ -14,7 +14,19 @@ def test_realtor_comps():
radius=0.5, radius=0.5,
) )
print(result) assert result is not None and len(result) > 0
def test_realtor_last_x_days_sold():
days_result_30 = scrape_property(
location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=30
)
days_result_10 = scrape_property(
location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=10
)
assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len(days_result_10)
def test_realtor(): def test_realtor():