refactor: merge master
commit
4726764482
|
@ -120,7 +120,7 @@ def _scrape_single_site(
|
||||||
|
|
||||||
def scrape_property(
|
def scrape_property(
|
||||||
location: str,
|
location: str,
|
||||||
site_name: Union[str, list[str]] = list(_scrapers.keys()),
|
site_name: Union[str, list[str]] = None,
|
||||||
listing_type: str = "for_sale",
|
listing_type: str = "for_sale",
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
|
@ -138,28 +138,25 @@ def scrape_property(
|
||||||
if not isinstance(site_name, list):
|
if not isinstance(site_name, list):
|
||||||
site_name = [site_name]
|
site_name = [site_name]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
if len(site_name) == 1:
|
if len(site_name) == 1:
|
||||||
final_df = _scrape_single_site(location, site_name[0], listing_type)
|
final_df = _scrape_single_site(location, site_name[0], listing_type)
|
||||||
final_df = final_df.drop_duplicates(
|
results.append(final_df)
|
||||||
subset=["street_address", "city", "unit"], keep="first"
|
else:
|
||||||
)
|
with ThreadPoolExecutor() as executor:
|
||||||
return final_df
|
futures = {
|
||||||
|
executor.submit(_scrape_single_site, location, s_name, listing_type): s_name
|
||||||
|
for s_name in site_name
|
||||||
|
}
|
||||||
|
|
||||||
results = []
|
for future in concurrent.futures.as_completed(futures):
|
||||||
with ThreadPoolExecutor() as executor:
|
result = future.result()
|
||||||
futures = {
|
results.append(result)
|
||||||
executor.submit(_scrape_single_site, location, s_name, listing_type): s_name
|
|
||||||
for s_name in site_name
|
|
||||||
}
|
|
||||||
|
|
||||||
for future in concurrent.futures.as_completed(futures):
|
|
||||||
result = future.result()
|
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
if not results:
|
if not results:
|
||||||
return pd.DataFrame()
|
return pd.DataFrame()
|
||||||
|
|
||||||
final_df = pd.concat(results, ignore_index=True)
|
final_df = pd.concat(results, ignore_index=True)
|
||||||
final_df = final_df.drop_duplicates(
|
final_df = final_df.drop_duplicates(subset=["street_address", "city", "unit"], keep="first")
|
||||||
subset=["street_address", "city", "unit"], keep="first"
|
|
||||||
)
|
|
||||||
return final_df
|
return final_df
|
Loading…
Reference in New Issue