- last x days param

pull/31/head
Zachary Hampton 2023-10-03 15:05:17 -07:00
parent 40bbf76db1
commit 088088ae51
4 changed files with 41 additions and 8 deletions

View File

@ -106,7 +106,7 @@ def _process_result(result: Property) -> pd.DataFrame:
return properties_df
def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None) -> pd.DataFrame:
def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None, sold_last_x_days: int = None) -> pd.DataFrame:
"""
Helper function to scrape a single site.
"""
@ -118,6 +118,7 @@ def _scrape_single_site(location: str, site_name: str, listing_type: str, radius
site_name=SiteName.get_by_value(site_name.lower()),
proxy=proxy,
radius=radius,
sold_last_x_days=sold_last_x_days
)
site = _scrapers[site_name.lower()](scraper_input)
@ -136,12 +137,14 @@ def scrape_property(
site_name: Union[str, list[str]] = "realtor.com",
listing_type: str = "for_sale",
radius: float = None,
sold_last_x_days: int = None,
proxy: str = None,
keep_duplicates: bool = False
) -> pd.DataFrame:
"""
Scrape property from various sites from a given location and listing type.
:param sold_last_x_days: Sold in last x days
:param radius: Radius in miles to find comparable properties on individual addresses
:param keep_duplicates:
:param proxy:
@ -160,12 +163,12 @@ def scrape_property(
results = []
if len(site_name) == 1:
final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy)
final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy, sold_last_x_days)
results.append(final_df)
else:
with ThreadPoolExecutor() as executor:
futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy): s_name
executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy, sold_last_x_days): s_name
for s_name in site_name
}

View File

@ -11,6 +11,7 @@ class ScraperInput:
site_name: SiteName
radius: float | None = None
proxy: str | None = None
sold_last_x_days: int | None = None
class Scraper:
@ -31,6 +32,7 @@ class Scraper:
self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name
self.radius = scraper_input.radius
self.sold_last_x_days = scraper_input.sold_last_x_days
def search(self) -> list[Property]:
...

View File

@ -4,7 +4,7 @@ homeharvest.realtor.__init__
This module implements the scraper for relator.com
"""
from ..models import Property, Address
from ..models import Property, Address, ListingType
from .. import Scraper
from ....exceptions import NoResultsFound
from ....utils import parse_address_one, parse_address_two
@ -204,6 +204,10 @@ class RealtorScraper(Scraper):
}
}}"""
sold_date_param = ('sold_date: { min: "$today-%sD" }' % self.sold_last_x_days
if self.listing_type == ListingType.SOLD and self.sold_last_x_days is not None
else "")
if not is_for_comps:
query = (
"""query Home_search(
@ -220,11 +224,17 @@ class RealtorScraper(Scraper):
postal_code: $postal_code
state_code: $state_code
status: %s
%s
}
limit: 200
offset: $offset
) %s"""
% (self.listing_type.value.lower(), results_query))
% (
self.listing_type.value.lower(),
sold_date_param,
results_query
)
)
else:
query = (
"""query Property_search(
@ -233,10 +243,16 @@ class RealtorScraper(Scraper):
$offset: Int!,
) {
property_search(
query: { nearby: { coordinates: $coordinates, radius: $radius } }
query: {
nearby: {
coordinates: $coordinates
radius: $radius
}
%s
}
limit: 200
offset: $offset
) %s""" % results_query)
) %s""" % (sold_date_param, results_query))
payload = {
"query": query,

View File

@ -14,7 +14,19 @@ def test_realtor_comps():
radius=0.5,
)
print(result)
assert result is not None and len(result) > 0
def test_realtor_last_x_days_sold():
days_result_30 = scrape_property(
location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=30
)
days_result_10 = scrape_property(
location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=10
)
assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len(days_result_10)
def test_realtor():