- refactor

pull/3/head
Zachary Hampton 2023-09-18 15:22:43 -07:00
parent d0a6a66b6a
commit 94e5b090da
1 changed files with 14 additions and 13 deletions

View File

@ -120,7 +120,7 @@ def _scrape_single_site(
def scrape_property( def scrape_property(
location: str, location: str,
site_name: Union[str, list[str]] = list(_scrapers.keys()), site_name: Union[str, list[str]] = None,
listing_type: str = "for_sale", listing_type: str = "for_sale",
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
@ -138,12 +138,12 @@ def scrape_property(
if not isinstance(site_name, list): if not isinstance(site_name, list):
site_name = [site_name] site_name = [site_name]
results = []
if len(site_name) == 1: if len(site_name) == 1:
final_df = _scrape_single_site(location, site_name[0], listing_type) final_df = _scrape_single_site(location, site_name[0], listing_type)
final_df = final_df.drop_duplicates(subset="street_address", keep="first") results.append(final_df)
return final_df else:
results = []
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
futures = { futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type): s_name executor.submit(_scrape_single_site, location, s_name, listing_type): s_name
@ -156,6 +156,7 @@ def scrape_property(
if not results: if not results:
return pd.DataFrame() return pd.DataFrame()
final_df = pd.concat(results, ignore_index=True) final_df = pd.concat(results, ignore_index=True)
final_df = final_df.drop_duplicates(subset="street_address", keep="first") final_df = final_df.drop_duplicates(subset="street_address", keep="first")
return final_df return final_df