fix: filter dup on street, unit, city

This commit is contained in:
Cullen Watson
2023-09-18 17:42:16 -05:00
parent 8e140a0e45
commit ca260fd2b4
8 changed files with 71 additions and 35 deletions

View File

@@ -140,7 +140,9 @@ def scrape_property(
if len(site_name) == 1:
final_df = _scrape_single_site(location, site_name[0], listing_type)
final_df = final_df.drop_duplicates(subset="street_address", keep="first")
final_df = final_df.drop_duplicates(
subset=["street_address", "city", "unit"], keep="first"
)
return final_df
results = []
@@ -157,5 +159,7 @@ def scrape_property(
if not results:
return pd.DataFrame()
final_df = pd.concat(results, ignore_index=True)
final_df = final_df.drop_duplicates(subset="street_address", keep="first")
final_df = final_df.drop_duplicates(
subset=["street_address", "city", "unit"], keep="first"
)
return final_df