- full street line (data quality improvement)

pull/82/head v0.3.23
Zachary Hampton 2024-05-12 18:49:44 -07:00
parent 3458a08383
commit b23b55ca80
5 changed files with 19 additions and 1 deletions

View File

@ -52,6 +52,7 @@ class PropertyType(Enum):
@dataclass
class Address:
full_line: str | None = None
street: str | None = None
unit: str | None = None
city: str | None = None

View File

@ -52,6 +52,7 @@ class RealtorScraper(Scraper):
listing_id
}
address {
line
street_direction
street_number
street_name
@ -236,6 +237,7 @@ class RealtorScraper(Scraper):
stories
}
address {
line
street_direction
street_number
street_name
@ -352,6 +354,7 @@ class RealtorScraper(Scraper):
street_number
street_name
street_suffix
line
unit
city
state_code
@ -657,6 +660,8 @@ class RealtorScraper(Scraper):
if not self.extra_property_data:
return {}
#: TODO: migrate "advertisers" and "estimates" to general query
query = """query GetHome($property_id: ID!) {
home(property_id: $property_id) {
__typename
@ -765,6 +770,7 @@ class RealtorScraper(Scraper):
address = result["address"]
return Address(
full_line=address.get("line"),
street=" ".join(
part for part in [
address.get("street_number"),

View File

@ -10,6 +10,7 @@ ordered_properties = [
"status",
"text",
"style",
"full_street_line",
"street",
"unit",
"city",
@ -55,6 +56,7 @@ def process_result(result: Property) -> pd.DataFrame:
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["full_street_line"] = address_data.full_line
prop_data["street"] = address_data.street
prop_data["unit"] = address_data.unit
prop_data["city"] = address_data.city

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.3.22"
version = "0.3.23"
description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest"

View File

@ -156,3 +156,12 @@ def test_realtor_without_extra_details():
]
assert results[0] != results[1]
def test_pr_zip_code():
results = scrape_property(
location="00741",
listing_type="for_sale",
)
assert results is not None and len(results) > 0