diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index 1f78893..afac3e1 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -52,6 +52,7 @@ class PropertyType(Enum): @dataclass class Address: + full_line: str | None = None street: str | None = None unit: str | None = None city: str | None = None diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index b6bd95b..a80e3dc 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -52,6 +52,7 @@ class RealtorScraper(Scraper): listing_id } address { + line street_direction street_number street_name @@ -236,6 +237,7 @@ class RealtorScraper(Scraper): stories } address { + line street_direction street_number street_name @@ -352,6 +354,7 @@ class RealtorScraper(Scraper): street_number street_name street_suffix + line unit city state_code @@ -657,6 +660,8 @@ class RealtorScraper(Scraper): if not self.extra_property_data: return {} + #: TODO: migrate "advertisers" and "estimates" to general query + query = """query GetHome($property_id: ID!) { home(property_id: $property_id) { __typename @@ -765,6 +770,7 @@ class RealtorScraper(Scraper): address = result["address"] return Address( + full_line=address.get("line"), street=" ".join( part for part in [ address.get("street_number"), diff --git a/homeharvest/utils.py b/homeharvest/utils.py index 55eb7a3..feb4105 100644 --- a/homeharvest/utils.py +++ b/homeharvest/utils.py @@ -10,6 +10,7 @@ ordered_properties = [ "status", "text", "style", + "full_street_line", "street", "unit", "city", @@ -55,6 +56,7 @@ def process_result(result: Property) -> pd.DataFrame: if "address" in prop_data: address_data = prop_data["address"] + prop_data["full_street_line"] = address_data.full_line prop_data["street"] = address_data.street prop_data["unit"] = address_data.unit prop_data["city"] = address_data.city diff --git a/pyproject.toml b/pyproject.toml index fb3d0c5..f51150e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.22" +version = "0.3.23" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/HomeHarvest" diff --git a/tests/test_realtor.py b/tests/test_realtor.py index f475eee..9a37da4 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -156,3 +156,12 @@ def test_realtor_without_extra_details(): ] assert results[0] != results[1] + + +def test_pr_zip_code(): + results = scrape_property( + location="00741", + listing_type="for_sale", + ) + + assert results is not None and len(results) > 0