- rename last_x_days

- docstrings for scrape_property
pull/31/head
Zachary Hampton 2023-10-04 18:06:06 -07:00
parent 6bb68766fc
commit de692faae2
5 changed files with 18 additions and 13 deletions

View File

@ -72,10 +72,10 @@ current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"output/{current_timestamp}.csv" filename = f"output/{current_timestamp}.csv"
properties = scrape_property( properties = scrape_property(
location="San Diego, CA", location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent) listing_type="sold", # or (for_sale, for_rent)
last_x_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent) property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
mls_only=True, # only fetch MLS listings mls_only=True, # only fetch MLS listings
) )
print(f"Number of properties: {len(properties)}") print(f"Number of properties: {len(properties)}")
@ -84,7 +84,6 @@ properties.to_csv(filename, index=False)
print(properties.head()) print(properties.head())
``` ```
## Output ## Output
```plaintext ```plaintext
>>> properties.head() >>> properties.head()

View File

@ -8,7 +8,7 @@ filename = f"output/{current_timestamp}.csv"
properties = scrape_property( properties = scrape_property(
location="San Diego, CA", location="San Diego, CA",
listing_type="sold", # for_sale, for_rent listing_type="sold", # for_sale, for_rent
last_x_days=30, # sold/listed in last 30 days property_younger_than=30, # sold/listed in last 30 days
mls_only=True, # only fetch MLS listings mls_only=True, # only fetch MLS listings
) )
print(f"Number of properties: {len(properties)}") print(f"Number of properties: {len(properties)}")

View File

@ -12,11 +12,17 @@ def scrape_property(
listing_type: str = "for_sale", listing_type: str = "for_sale",
radius: float = None, radius: float = None,
mls_only: bool = False, mls_only: bool = False,
last_x_days: int = None, property_younger_than: int = None,
proxy: str = None, proxy: str = None,
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Scrape properties from Realtor.com based on a given location and listing type. Scrape properties from Realtor.com based on a given location and listing type.
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
:param listing_type: Listing Type (for_sale, for_rent, sold)
:param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
:param mls_only: If set, fetches only listings with MLS IDs.
:param property_younger_than: Get properties sold/listed in last _ days.
:param proxy: Proxy to use for scraping
""" """
validate_input(listing_type) validate_input(listing_type)
@ -26,7 +32,7 @@ def scrape_property(
proxy=proxy, proxy=proxy,
radius=radius, radius=radius,
mls_only=mls_only, mls_only=mls_only,
last_x_days=last_x_days, last_x_days=property_younger_than,
) )
site = RealtorScraper(scraper_input) site = RealtorScraper(scraper_input)

View File

@ -68,7 +68,7 @@ def main():
radius=args.radius, radius=args.radius,
proxy=args.proxy, proxy=args.proxy,
mls_only=args.mls_only, mls_only=args.mls_only,
last_x_days=args.days, property_younger_than=args.days,
) )
if not args.filename: if not args.filename:

View File

@ -9,7 +9,7 @@ def test_realtor_comps():
result = scrape_property( result = scrape_property(
location="2530 Al Lipscomb Way", location="2530 Al Lipscomb Way",
radius=0.5, radius=0.5,
last_x_days=180, property_younger_than=180,
listing_type="sold", listing_type="sold",
) )
@ -18,11 +18,11 @@ def test_realtor_comps():
def test_realtor_last_x_days_sold(): def test_realtor_last_x_days_sold():
days_result_30 = scrape_property( days_result_30 = scrape_property(
location="Dallas, TX", listing_type="sold", last_x_days=30 location="Dallas, TX", listing_type="sold", property_younger_than=30
) )
days_result_10 = scrape_property( days_result_10 = scrape_property(
location="Dallas, TX", listing_type="sold", last_x_days=10 location="Dallas, TX", listing_type="sold", property_younger_than=10
) )
assert all( assert all(