mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 19:44:29 -08:00
fix: filter dup on street, unit, city
This commit is contained in:
@@ -1,7 +1,29 @@
|
||||
import re
|
||||
|
||||
|
||||
def parse_address_two(street_address: str):
|
||||
def parse_address_two(street_address: str) -> tuple:
|
||||
if not street_address:
|
||||
return street_address, None
|
||||
|
||||
apt_match = re.search(
|
||||
r"(APT\s*[\dA-Z]+|#[\dA-Z]+|UNIT\s*[\dA-Z]+|LOT\s*[\dA-Z]+)$",
|
||||
street_address,
|
||||
re.I,
|
||||
)
|
||||
|
||||
if apt_match:
|
||||
apt_str = apt_match.group().strip()
|
||||
cleaned_apt_str = re.sub(r"(APT\s*|UNIT\s*|LOT\s*)", "#", apt_str, flags=re.I)
|
||||
|
||||
main_address = street_address.replace(
|
||||
apt_str, ""
|
||||
).strip() # Remove the matched part from the original address
|
||||
return main_address, cleaned_apt_str
|
||||
else:
|
||||
return street_address, None
|
||||
|
||||
|
||||
def parse_unit(street_address: str):
|
||||
if not street_address:
|
||||
return None
|
||||
apt_match = re.search(
|
||||
@@ -19,7 +41,7 @@ def parse_address_two(street_address: str):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(parse_address_two("810 E Colter St APT 32"))
|
||||
print(parse_address_two("4303 E Cactus Rd Apt 126"))
|
||||
print(parse_address_two("1234 Elm Street apt 2B"))
|
||||
print(parse_address_two("1234 Elm Street UNIT 3A"))
|
||||
print(parse_address_two("1234 Elm Street unit 3A"))
|
||||
|
||||
Reference in New Issue
Block a user