Compare commits

...

15 Commits

Author SHA1 Message Date
Cullen Watson
a433e46258 chore: update version number 2023-09-17 15:12:39 -05:00
Cullen Watson
df3519ae18 docs: add example 2023-09-17 15:10:21 -05:00
Cullen Watson
2f5ea1ca88 feat(scrapers): add zillow 2023-09-17 15:06:31 -05:00
Zachary Hampton
2f3b012747 - single address support 2023-09-16 14:34:10 -07:00
Zachary Hampton
5ea0fa0bdb - redfin city support
- test case updates
- types addition
- docs grammar
2023-09-16 13:39:03 -07:00
Zachary Hampton
2d6e746ae9 Create LICENSE 2023-09-16 10:39:36 -07:00
Zachary Hampton
a772fe45aa - rename to property 2023-09-16 10:11:39 -07:00
Zachary Hampton
4764b6bd37 Merge remote-tracking branch 'origin/master' 2023-09-15 20:59:03 -07:00
Zachary Hampton
0946abd35a - realtor init 2023-09-15 20:58:54 -07:00
Cullen Watson
0a2fb4cb31 docs: add roadmap 2023-09-15 21:47:46 -05:00
Zachary Hampton
af1f2fa531 - version increment & pyproject.toml 2023-09-15 16:12:05 -07:00
Zachary Hampton
8107103383 - redfin finished 2023-09-15 16:03:17 -07:00
Zachary Hampton
469b703288 - redfin init 2023-09-15 15:42:47 -07:00
Zachary Hampton
a79c4c6872 - housekeeping 2023-09-15 15:21:29 -07:00
Zachary Hampton
ed7e76e4b0 - base 2023-09-15 15:17:37 -07:00
15 changed files with 854 additions and 5 deletions

5
.gitignore vendored
View File

@@ -1,2 +1,5 @@
/.idea
dist
**/dist/
**/__pycache__/
**/.pytest_cache/
*.pyc

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Zachary Hampton
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1 +1,35 @@
# HomeHarvest
**HomeHarvest** aims to be the top Python real estate scraping library.
## RoadMap
- **Supported Sites**: Currently, we support scraping from sites such as `Zillow` and `RedFin`.
- **Output**: Provides the option to return the scraped data as a Pandas dataframe.
- **Under Consideration**: We're looking into the possibility of an Excel plugin to cater to a broader audience.
## Site Name Options
- `zillow`
- `redfin`
## Listing Types
- `for_rent`
- `for_sale`
### Installation
```bash
pip install --upgrade homeharvest
```
### Example Usage
```
from homeharvest import scrape_property
properties = scrape_property(
location="85281", site_name="zillow", listing_type="for_rent"
)
print(properties)
```

View File

@@ -0,0 +1,37 @@
from .core.scrapers.redfin import RedfinScraper
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.zillow import ZillowScraper
from .core.scrapers.models import ListingType, Property, Building
from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType
from typing import Union
_scrapers = {
"redfin": RedfinScraper,
"realtor.com": RealtorScraper,
"zillow": ZillowScraper,
}
def scrape_property(
location: str,
site_name: str,
listing_type: str = "for_sale", #: for_sale, for_rent, sold
) -> Union[list[Building], list[Property]]: #: eventually, return pandas dataframe
if site_name.lower() not in _scrapers:
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(
f"Provided listing type, '{listing_type}', does not exist."
)
scraper_input = ScraperInput(
location=location,
listing_type=ListingType[listing_type.upper()],
)
site = _scrapers[site_name.lower()](scraper_input)
return site.search()

View File

@@ -0,0 +1,35 @@
from dataclasses import dataclass
import requests
from .models import Property, ListingType
@dataclass
class ScraperInput:
location: str
listing_type: ListingType
proxy_url: str | None = None
class Scraper:
listing_type = ListingType.FOR_SALE
def __init__(self, scraper_input: ScraperInput):
self.location = scraper_input.location
self.session = requests.Session()
Scraper.listing_type = scraper_input.listing_type
if scraper_input.proxy_url:
self.session.proxies = {
"http": scraper_input.proxy_url,
"https": scraper_input.proxy_url,
}
def search(self) -> list[Property]:
...
@staticmethod
def _parse_home(home) -> Property:
...
def handle_location(self):
...

View File

@@ -0,0 +1,52 @@
from dataclasses import dataclass
from enum import Enum
class ListingType(Enum):
FOR_SALE = "for_sale"
FOR_RENT = "for_rent"
SOLD = "sold"
@dataclass
class Address:
address_one: str
city: str
state: str
zip_code: str
address_two: str | None = None
@dataclass
class Property:
address: Address
url: str
beds: int | None = None
baths: float | None = None
stories: int | None = None
agent_name: str | None = None
year_built: int | None = None
square_feet: int | None = None
price_per_square_foot: int | None = None
year_built: int | None = None
price: int | None = None
mls_id: str | None = None
listing_type: ListingType | None = None
lot_size: int | None = None
description: str | None = None
@dataclass
class Building:
address: Address
url: str
num_units: int | None = None
min_unit_price: int | None = None
max_unit_price: int | None = None
avg_unit_price: int | None = None
listing_type: str | None = None

View File

@@ -0,0 +1,51 @@
import json
from ..models import Property, Address
from .. import Scraper
from typing import Any
class RealtorScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
def handle_location(self):
headers = {
"authority": "parser-external.geo.moveaws.com",
"accept": "*/*",
"accept-language": "en-US,en;q=0.9",
"origin": "https://www.realtor.com",
"referer": "https://www.realtor.com/",
"sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "cross-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
}
params = {
"input": self.location,
"client_id": "for-sale",
"limit": "1",
"area_types": "city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park",
}
response = self.session.get(
"https://parser-external.geo.moveaws.com/suggest",
params=params,
headers=headers,
)
response_json = response.json()
return response_json["autocomplete"][0]
def search(self):
location_info = self.handle_location()
location_type = location_info["area_type"]
"""
property types:
apartment + building + commercial + condo_townhome + condo_townhome_rowhome_coop + condos + coop + duplex_triplex + farm + investment + land + mobile + multi_family + rental + single_family + townhomes
"""
print("a")

View File

@@ -0,0 +1,114 @@
import json
from ..models import Property, Address
from .. import Scraper
from typing import Any
class RedfinScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
def _handle_location(self):
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(
self.location
)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
def get_region_type(match_type: str):
if match_type == "4":
return "2" #: zip
elif match_type == "2":
return "6" #: city
elif match_type == "1":
return "address" #: address, needs to be handled differently
if response_json["payload"]["exactMatch"] is not None:
target = response_json["payload"]["exactMatch"]
else:
target = response_json["payload"]["sections"][0]["rows"][0]
return target["id"].split("_")[1], get_region_type(target["type"])
@staticmethod
def _parse_home(home: dict, single_search: bool = False) -> Property:
def get_value(key: str) -> Any | None:
if key in home and "value" in home[key]:
return home[key]["value"]
if not single_search:
address = Address(
address_one=get_value("streetLine"),
city=home["city"],
state=home["state"],
zip_code=home["zip"],
)
else:
address_info = home["streetAddress"]
address = Address(
address_one=address_info["assembledAddress"],
city=home["city"],
state=home["state"],
zip_code=home["zip"],
)
url = "https://www.redfin.com{}".format(home["url"])
return Property(
address=address,
url=url,
beds=home["beds"] if "beds" in home else None,
baths=home["baths"] if "baths" in home else None,
stories=home["stories"] if "stories" in home else None,
agent_name=get_value("listingAgent"),
description=home["listingRemarks"] if "listingRemarks" in home else None,
year_built=get_value("yearBuilt")
if not single_search
else home["yearBuilt"],
square_feet=get_value("sqFt"),
price_per_square_foot=get_value("pricePerSqFt"),
price=get_value("price"),
mls_id=get_value("mlsId"),
)
def handle_address(self, home_id: str):
"""
EPs:
https://www.redfin.com/stingray/api/home/details/initialInfo?al=1&path=/TX/Austin/70-Rainey-St-78701/unit-1608/home/147337694
https://www.redfin.com/stingray/api/home/details/mainHouseInfoPanelInfo?propertyId=147337694&accessLevel=3
https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=147337694&accessLevel=3
https://www.redfin.com/stingray/api/home/details/belowTheFold?propertyId=147337694&accessLevel=3
"""
url = "https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId={}&accessLevel=3".format(
home_id
)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
parsed_home = self._parse_home(
response_json["payload"]["addressSectionInfo"], single_search=True
)
return [parsed_home]
def search(self):
region_id, region_type = self._handle_location()
if region_type == "address":
home_id = region_id
return self.handle_address(home_id)
url = "https://www.redfin.com/stingray/api/gis?al=1&region_id={}&region_type={}".format(
region_id, region_type
)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
homes = [
self._parse_home(home) for home in response_json["payload"]["homes"]
] #: support buildings
return homes

View File

@@ -0,0 +1,205 @@
import re
import json
from ..models import Property, Address, Building, ListingType
from ....exceptions import NoResultsFound, PropertyNotFound
from .. import Scraper
class ZillowScraper(Scraper):
listing_type: ListingType.FOR_SALE
def __init__(self, scraper_input):
super().__init__(scraper_input)
if self.listing_type == ListingType.FOR_SALE:
self.url = f"https://www.zillow.com/homes/for_sale/{self.location}_rb/"
elif self.listing_type == ListingType.FOR_RENT:
self.url = f"https://www.zillow.com/homes/for_rent/{self.location}_rb/"
def search(self):
resp = self.session.get(self.url, headers=self._get_headers())
resp.raise_for_status()
content = resp.text
match = re.search(
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
content,
re.DOTALL,
)
if not match:
raise NoResultsFound(
"No results were found for Zillow with the given Location."
)
json_str = match.group(1)
data = json.loads(json_str)
if "searchPageState" in data["props"]["pageProps"]:
houses = data["props"]["pageProps"]["searchPageState"]["cat1"][
"searchResults"
]["listResults"]
return [self._parse_home(house) for house in houses]
elif "gdpClientCache" in data["props"]["pageProps"]:
gdp_client_cache = json.loads(data["props"]["pageProps"]["gdpClientCache"])
main_key = list(gdp_client_cache.keys())[0]
property_data = gdp_client_cache[main_key]["property"]
property = self._get_single_property_page(property_data)
return [property]
raise PropertyNotFound("Specific property data not found in the response.")
@classmethod
def _parse_home(cls, home: dict):
"""
This method is used when a user enters a generic location & zillow returns more than one property
"""
url = (
f"https://www.zillow.com{home['detailUrl']}"
if "zillow.com" not in home["detailUrl"]
else home["detailUrl"]
)
if "hdpData" in home and "homeInfo" in home["hdpData"]:
price_data = cls._extract_price(home)
address = cls._extract_address(home)
agent_name = cls._extract_agent_name(home)
beds = home["hdpData"]["homeInfo"]["bedrooms"]
baths = home["hdpData"]["homeInfo"]["bathrooms"]
listing_type = home["hdpData"]["homeInfo"].get("homeType")
return Property(
address=address,
agent_name=agent_name,
url=url,
beds=beds,
baths=baths,
listing_type=listing_type,
**price_data,
)
else:
keys = ("addressStreet", "addressCity", "addressState", "addressZipcode")
address_one, city, state, zip_code = (home[key] for key in keys)
address_one, address_two = cls._parse_address_two(address_one)
address = Address(address_one, city, state, zip_code, address_two)
building_info = cls._extract_building_info(home)
return Building(address=address, url=url, **building_info)
@classmethod
def _get_single_property_page(cls, property_data: dict):
"""
This method is used when a user enters the exact location & zillow returns just one property
"""
url = (
f"https://www.zillow.com{property_data['hdpUrl']}"
if "zillow.com" not in property_data["hdpUrl"]
else property_data["hdpUrl"]
)
address_data = property_data["address"]
address_one, address_two = cls._parse_address_two(address_data["streetAddress"])
address = Address(
address_one=address_one,
address_two=address_two,
city=address_data["city"],
state=address_data["state"],
zip_code=address_data["zipcode"],
)
return Property(
address=address,
url=url,
beds=property_data.get("bedrooms", None),
baths=property_data.get("bathrooms", None),
year_built=property_data.get("yearBuilt", None),
price=property_data.get("price", None),
lot_size=property_data.get("lotSize", None),
agent_name=property_data.get("attributionInfo", {}).get("agentName", None),
stories=property_data.get("resoFacts", {}).get("stories", None),
description=property_data.get("description", None),
mls_id=property_data.get("attributionInfo", {}).get("mlsId", None),
price_per_square_foot=property_data.get("resoFacts", {}).get(
"pricePerSquareFoot", None
),
square_feet=property_data.get("livingArea", None),
listing_type=property_data.get("homeType", None),
)
@classmethod
def _extract_building_info(cls, home: dict) -> dict:
num_units = len(home["units"])
prices = [
int(unit["price"].replace("$", "").replace(",", "").split("+")[0])
for unit in home["units"]
]
return {
"listing_type": cls.listing_type,
"num_units": len(home["units"]),
"min_unit_price": min(
(
int(unit["price"].replace("$", "").replace(",", "").split("+")[0])
for unit in home["units"]
)
),
"max_unit_price": max(
(
int(unit["price"].replace("$", "").replace(",", "").split("+")[0])
for unit in home["units"]
)
),
"avg_unit_price": sum(prices) // len(prices) if num_units else None,
}
@staticmethod
def _extract_price(home: dict) -> dict:
price = int(home["hdpData"]["homeInfo"]["priceForHDP"])
square_feet = home["hdpData"]["homeInfo"].get("livingArea")
lot_size = home["hdpData"]["homeInfo"].get("lotAreaValue")
price_per_square_foot = price // square_feet if square_feet and price else None
return {
k: v
for k, v in locals().items()
if k in ["price", "square_feet", "lot_size", "price_per_square_foot"]
}
@staticmethod
def _extract_agent_name(home: dict) -> str | None:
broker_str = home.get("brokerName", "")
match = re.search(r"Listing by: (.+)", broker_str)
return match.group(1) if match else None
@staticmethod
def _parse_address_two(address_one: str):
apt_match = re.search(r"(APT\s*.+|#[\s\S]+)$", address_one, re.I)
address_two = apt_match.group().strip() if apt_match else None
address_one = (
address_one.replace(address_two, "").strip() if address_two else address_one
)
return address_one, address_two
@staticmethod
def _extract_address(home: dict) -> Address:
keys = ("streetAddress", "city", "state", "zipcode")
address_one, city, state, zip_code = (
home["hdpData"]["homeInfo"][key] for key in keys
)
address_one, address_two = ZillowScraper._parse_address_two(address_one)
return Address(address_one, city, state, zip_code, address_two=address_two)
@staticmethod
def _get_headers():
return {
"authority": "parser-external.geo.moveaws.com",
"accept": "*/*",
"accept-language": "en-US,en;q=0.9",
"origin": "https://www.zillow.com",
"referer": "https://www.zillow.com/",
"sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "cross-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
}

14
homeharvest/exceptions.py Normal file
View File

@@ -0,0 +1,14 @@
class InvalidSite(Exception):
"""Raised when a provided site is does not exist."""
class InvalidListingType(Exception):
"""Raised when a provided listing type is does not exist."""
class NoResultsFound(Exception):
"""Raised when no results are found for the given location"""
class PropertyNotFound(Exception):
"""Raised when no property is found for the given address"""

245
poetry.lock generated Normal file
View File

@@ -0,0 +1,245 @@
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
[[package]]
name = "certifi"
version = "2023.7.22"
description = "Python package for providing Mozilla's CA Bundle."
optional = false
python-versions = ">=3.6"
files = [
{file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
{file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
]
[[package]]
name = "charset-normalizer"
version = "3.2.0"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
optional = false
python-versions = ">=3.7.0"
files = [
{file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"},
{file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"},
{file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"},
{file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"},
{file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"},
{file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"},
{file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"},
{file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"},
{file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"},
{file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
]
[[package]]
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
[[package]]
name = "exceptiongroup"
version = "1.1.3"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
{file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
{file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
]
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "idna"
version = "3.4"
description = "Internationalized Domain Names in Applications (IDNA)"
optional = false
python-versions = ">=3.5"
files = [
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
]
[[package]]
name = "iniconfig"
version = "2.0.0"
description = "brain-dead simple config-ini parsing"
optional = false
python-versions = ">=3.7"
files = [
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
]
[[package]]
name = "packaging"
version = "23.1"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.7"
files = [
{file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
]
[[package]]
name = "pluggy"
version = "1.3.0"
description = "plugin and hook calling mechanisms for python"
optional = false
python-versions = ">=3.8"
files = [
{file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
{file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
]
[package.extras]
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
[[package]]
name = "pytest"
version = "7.4.2"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.7"
files = [
{file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"},
{file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"},
]
[package.dependencies]
colorama = {version = "*", markers = "sys_platform == \"win32\""}
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
iniconfig = "*"
packaging = "*"
pluggy = ">=0.12,<2.0"
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
[package.extras]
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
[[package]]
name = "requests"
version = "2.31.0"
description = "Python HTTP for Humans."
optional = false
python-versions = ">=3.7"
files = [
{file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
{file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
]
[package.dependencies]
certifi = ">=2017.4.17"
charset-normalizer = ">=2,<4"
idna = ">=2.5,<4"
urllib3 = ">=1.21.1,<3"
[package.extras]
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "tomli"
version = "2.0.1"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.7"
files = [
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
]
[[package]]
name = "urllib3"
version = "2.0.4"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.7"
files = [
{file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"},
{file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"},
]
[package.extras]
brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "bc3567f9501f9e18bf9f53d8b4efe1e7e3fc2d750ceda2fbab165bfa22d49c64"

View File

@@ -1,14 +1,19 @@
[tool.poetry]
name = "homeharvest"
version = "0.1.0"
description = "Real estate scraping library for Zillow & Redfin"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>"]
version = "0.1.2"
description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
requests = "^2.31.0"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

9
tests/test_realtor.py Normal file
View File

@@ -0,0 +1,9 @@
from homeharvest import scrape_property
def test_realtor():
results = [
scrape_property(location="85281", site_name="realtor.com"),
]
assert all([result is not None for result in results])

12
tests/test_redfin.py Normal file
View File

@@ -0,0 +1,12 @@
from homeharvest import scrape_property
def test_redfin():
results = [
scrape_property(location="2530 Al Lipscomb Way", site_name="redfin"),
scrape_property(location="Phoenix, AZ, USA", site_name="redfin"),
scrape_property(location="Dallas, TX, USA", site_name="redfin"),
scrape_property(location="85281", site_name="redfin"),
]
assert all([result is not None for result in results])

12
tests/test_zillow.py Normal file
View File

@@ -0,0 +1,12 @@
from homeharvest import scrape_property
def test_zillow():
results = [
scrape_property(location="2530 Al Lipscomb Way", site_name="zillow"),
scrape_property(location="Phoenix, AZ, USA", site_name="zillow"),
scrape_property(location="Dallas, TX, USA", site_name="zillow"),
scrape_property(location="85281", site_name="zillow"),
]
assert all([result is not None for result in results])