Compare commits

...

69 Commits

Author SHA1 Message Date
Zachary Hampton
f726548cc6 Update pyproject.toml 2023-10-18 09:35:48 -07:00
Zachary Hampton
fad7d670eb Update README.md 2023-10-18 08:37:42 -07:00
Zachary Hampton
89a6f93c9f Update pyproject.toml 2023-10-18 08:37:26 -07:00
Zachary Hampton
e1090b06e4 Update README.md 2023-10-17 20:22:25 -07:00
Cullen Watson
5036e74b60 Merge branch 'master' of https://github.com/ZacharyHampton/HomeHarvest 2023-10-09 11:30:17 -05:00
Cullen Watson
2cb544bc8d [chore] display clickable URLs in jupyter 2023-10-09 11:28:56 -05:00
Zachary Hampton
68cb365e03 Merge pull request #34 from ZacharyHampton/days_on_mls
[enh] days_on_mls attr
2023-10-09 09:04:59 -07:00
Cullen Watson
23876d5725 [chore] function types 2023-10-09 11:02:51 -05:00
Cullen Watson
b59d55f6b5 [enh] days_on_mls attr 2023-10-09 11:00:36 -05:00
Cullen Watson
3c3adb5f29 [docs] update video 2023-10-05 20:24:23 -05:00
Zachary Hampton
6ede8622cc - pending listing support
- removal of pending_or_contingent param
2023-10-05 11:43:00 -07:00
Cullen Watson
9f50d33bdb [chore] remove unused dependency 2023-10-05 10:11:45 -05:00
Cullen Watson
735ec021f7 [docs] README 2023-10-05 10:03:21 -05:00
Zachary Hampton
00537329cf - version bump 2023-10-04 21:35:21 -07:00
Zachary Hampton
a9225b532f - rename days variable 2023-10-04 21:35:14 -07:00
Zachary Hampton
ba7ad069c9 Merge pull request #32 from ZacharyHampton/key_error
[fix] keyerror on style
2023-10-04 20:35:05 -07:00
Cullen Watson
22bda972b0 [chore] version number 2023-10-04 22:34:52 -05:00
Cullen Watson
6f5bbf79a4 [fix] keyerror on style 2023-10-04 22:33:21 -05:00
Cullen Watson
608cceba34 [docs] reorder 2023-10-04 22:12:16 -05:00
Cullen Watson
3609586995 [docs]: add contingent to example 2023-10-04 22:11:38 -05:00
Cullen Watson
68c7e411e4 [docs] pending / contingent searches 2023-10-04 22:07:51 -05:00
Cullen Watson
5e825601a7 [docs] update example 2023-10-04 21:50:54 -05:00
Cullen Watson
ce3f94d0af [docs] update example 2023-10-04 21:50:16 -05:00
Zachary Hampton
4a1116440d Merge pull request #31 from ZacharyHampton/v0.3
v0.3
2023-10-04 19:26:44 -07:00
Cullen Watson
2d092c595f [docs]: Update README.md 2023-10-04 21:24:24 -05:00
Cullen Watson
4dbb064fe9 [docs]: Update README.md 2023-10-04 21:21:45 -05:00
Cullen Watson
4e78248032 Update README.md 2023-10-04 21:17:49 -05:00
Zachary Hampton
37e20f4469 - remove neighborhoods
- rename data
2023-10-04 18:44:47 -07:00
Zachary Hampton
8a5f0dc2c9 - pending or contingent support 2023-10-04 18:25:01 -07:00
Zachary Hampton
de692faae2 - rename last_x_days
- docstrings for scrape_property
2023-10-04 18:06:06 -07:00
Zachary Hampton
6bb68766fc - realtor tests 2023-10-04 12:04:05 -07:00
Zachary Hampton
446d5488b8 - single address support again 2023-10-04 10:07:32 -07:00
Cullen Watson
68e15ce696 [docs] clarify example 2023-10-04 10:14:11 -05:00
Cullen Watson
c4870677c2 [enh]: make last_x_days generic
add mls_only
make radius generic
2023-10-04 10:11:53 -05:00
Cullen Watson
51bde20c3c [chore]: clean up 2023-10-04 08:58:55 -05:00
Zachary Hampton
f8c0dd766d - realtor support 2023-10-03 23:33:53 -07:00
Zachary Hampton
f06a01678c - cli readme update 2023-10-03 22:31:23 -07:00
Zachary Hampton
d2879734e6 - cli update 2023-10-03 22:25:29 -07:00
Zachary Hampton
bf81ef413f - version bump 2023-10-03 22:22:09 -07:00
Zachary Hampton
29664e4eee - cullen merge 2023-10-03 22:21:16 -07:00
Zachary Hampton
088088ae51 - last x days param 2023-10-03 15:05:17 -07:00
Zachary Hampton
40bbf76db1 - realtor radius 2023-10-02 13:58:47 -07:00
Zachary Hampton
1f1ca8068f - realtor.com default 2023-10-02 10:28:13 -07:00
Zachary Hampton
8388d47f73 - version bump 2023-10-01 09:13:37 -07:00
Zachary Hampton
ba503b0ca3 Merge pull request #27 from ddxv/zillow-ua-header
Zillow Request Header: Match observed behaivor in FireFox of not sending sec-ch-ua headers
2023-10-01 09:12:58 -07:00
james
8962d619e1 Match observed behaivor in FireFox of not sending ua-ch headers in request to prevent recent 403 2023-10-01 11:31:51 +08:00
Zachary Hampton
3b7c17b7b5 - zillow proxy support 2023-09-28 18:40:16 -07:00
Zachary Hampton
59317fd6fc Merge pull request #25 from ZacharyHampton/fix/recent-issues
Fix/recent issues
2023-09-28 18:27:04 -07:00
Zachary Hampton
928b431d1f - bump version 2023-09-28 18:25:53 -07:00
Zachary Hampton
896f862137 - zillow flow update 2023-09-28 18:25:47 -07:00
Zachary Hampton
3174f5076c Merge pull request #23 from ZacharyHampton/fix/recent-issues
Fixes & Changes for recent issues
2023-09-28 18:07:55 -07:00
Zachary Hampton
2abbb913a8 - convert posted_time to datetime
- zillow location bug fix
2023-09-28 18:07:42 -07:00
Cullen Watson
73b6d5b33f [fix] zilow tls client 2023-09-28 19:34:01 -05:00
Zachary Hampton
da39c989d9 - version bump 2023-09-28 15:27:36 -07:00
Zachary Hampton
01c53f9399 - redfin bug fix
- add recent features for issues
2023-09-28 15:19:43 -07:00
Zachary Hampton
9200c17df2 - version bump 2023-09-23 10:55:50 -07:00
Zachary Hampton
9e262bf214 Merge remote-tracking branch 'origin/master' 2023-09-23 10:55:29 -07:00
Zachary Hampton
82f78fb578 - zillow bug fix 2023-09-23 10:55:14 -07:00
Cullen Watson
b0e40df00a Update pyproject.toml 2023-09-22 09:51:24 -05:00
Cullen Watson
2fc40e0dad fix: cookie 2023-09-22 09:47:37 -05:00
Zachary Hampton
254f3a68a1 - redfin bug fix 2023-09-21 18:54:03 -07:00
Zachary Hampton
05713c76b0 - redfin bug fix
- .get
2023-09-21 11:27:12 -07:00
Cullen Watson
9120cc9bfe fix: remove line 2023-09-21 13:10:14 -05:00
Cullen Watson
eee4b19515 Merge branch 'master' of https://github.com/ZacharyHampton/HomeHarvest 2023-09-21 13:06:15 -05:00
Cullen Watson
c25961eded fix: KeyEror : [minBaths] 2023-09-21 13:06:06 -05:00
Zachary Hampton
0884c3d163 Update README.md 2023-09-21 09:55:29 -07:00
Cullen Watson
8f37bfdeb8 chore: version number 2023-09-21 11:19:23 -05:00
Cullen Watson
48c2338276 fix: keyerror 2023-09-21 11:18:37 -05:00
Cullen Watson
f58a1f4a74 docs: tryhomeharvest.com 2023-09-21 10:57:11 -05:00
18 changed files with 1079 additions and 1364 deletions

226
README.md
View File

@@ -1,16 +1,26 @@
<img src="https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/d1a2bf8b-09f5-4c57-b33a-0ada8a34f12d" width="400">
**HomeHarvest** is a simple, yet comprehensive, real estate scraping library.
**HomeHarvest** is a simple, yet comprehensive, real estate scraping library that extracts and formats data in the style of MLS listings.
[![Try with Replit](https://replit.com/badge?caption=Try%20with%20Replit)](https://replit.com/@ZacharyHampton/HomeHarvestDemo)
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/zachary-products/15min)** *to work with us.*
## Features
**Not technical?** Try out the web scraping tool on our site at [tryhomeharvest.com](https://tryhomeharvest.com).
- Scrapes properties from **Zillow**, **Realtor.com** & **Redfin** simultaneously
- Aggregates the properties in a Pandas DataFrame
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/bunsly/15min)** *to work with us.*
[Video Guide for HomeHarvest](https://youtu.be/JnV7eR2Ve2o) - _updated for release v0.2.7_
Check out another project we wrote: ***[JobSpy](https://github.com/Bunsly/JobSpy)** a Python package for job scraping*
## HomeHarvest Features
- **Source**: Fetches properties directly from **Realtor.com**.
- **Data Format**: Structures data to resemble MLS listings.
- **Export Flexibility**: Options to save as either CSV or Excel.
- **Usage Modes**:
- **Python**: For those who'd like to integrate scraping into their Python scripts.
- **CLI**: For users who prefer command-line operations.
[Video Guide for HomeHarvest](https://youtu.be/J1qgNPgmSLI) - _updated for release v0.3.4_
![homeharvest](https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/b3d5d727-e67b-4a9f-85d8-1e65fd18620a)
@@ -23,138 +33,156 @@ pip install homeharvest
## Usage
### CLI
```bash
homeharvest "San Francisco, CA" -s zillow realtor.com redfin -l for_rent -o excel -f HomeHarvest
```
This will scrape properties from the specified sites for the given location and listing type, and save the results to an Excel file named `HomeHarvest.xlsx`.
By default:
- If `-s` or `--site_name` is not provided, it will scrape from all available sites.
- If `-l` or `--listing_type` is left blank, the default is `for_sale`. Other options are `for_rent` or `sold`.
- The `-o` or `--output` default format is `excel`. Options are `csv` or `excel`.
- If `-f` or `--filename` is left blank, the default is `HomeHarvest_<current_timestamp>`.
- If `-p` or `--proxy` is not provided, the scraper uses the local IP.
- Use `-k` or `--keep_duplicates` to keep duplicate properties based on address. If not provided, duplicates will be removed.
### Python
```py
from homeharvest import scrape_property
import pandas as pd
from datetime import datetime
properties: pd.DataFrame = scrape_property(
site_name=["zillow", "realtor.com", "redfin"],
location="85281",
listing_type="for_rent" # for_sale / sold
# Generate filename based on current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"HomeHarvest_{current_timestamp}.csv"
properties = scrape_property(
location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent, pending)
past_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
# mls_only=True, # only fetch MLS listings
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
)
print(f"Number of properties: {len(properties)}")
#: Note, to export to CSV or Excel, use properties.to_csv() or properties.to_excel().
print(properties)
# Export to csv
properties.to_csv(filename, index=False)
print(properties.head())
```
### CLI
```
usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] [-c] location
Home Harvest Property Scraper
positional arguments:
location Location to scrape (e.g., San Francisco, CA)
options:
-l {for_sale,for_rent,sold,pending}, --listing_type {for_sale,for_rent,sold,pending}
Listing type to scrape
-o {excel,csv}, --output {excel,csv}
Output format
-f FILENAME, --filename FILENAME
Name of the output file (without extension)
-p PROXY, --proxy PROXY
Proxy to use for scraping
-d DAYS, --days DAYS Sold/listed in last _ days filter.
-r RADIUS, --radius RADIUS
Get comparable properties within _ (e.g., 0.0) miles. Only applicable for individual addresses.
-m, --mls_only If set, fetches only MLS listings.
```
```bash
homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest
```
## Output
```py
>>> properties.head()
property_url site_name listing_type apt_min_price apt_max_price ...
0 https://www.redfin.com/AZ/Tempe/1003-W-Washing... redfin for_rent 1666.0 2750.0 ...
1 https://www.redfin.com/AZ/Tempe/VELA-at-Town-L... redfin for_rent 1665.0 3763.0 ...
2 https://www.redfin.com/AZ/Tempe/Camden-Tempe/a... redfin for_rent 1939.0 3109.0 ...
3 https://www.redfin.com/AZ/Tempe/Emerson-Park/a... redfin for_rent 1185.0 1817.0 ...
4 https://www.redfin.com/AZ/Tempe/Rio-Paradiso-A... redfin for_rent 1470.0 2235.0 ...
[5 rows x 41 columns]
```
### Parameters for `scrape_properties()`
```plaintext
Required
├── location (str): address in various formats e.g. just zip, full address, city/state, etc.
└── listing_type (enum): for_rent, for_sale, sold
Optional
├── site_name (list[enum], default=all three sites): zillow, realtor.com, redfin
├── proxy (str): in format 'http://user:pass@host:port' or [https, socks]
└── keep_duplicates (bool, default=False): whether to keep or remove duplicate properties based on address
>>> properties.head()
MLS MLS # Status Style ... COEDate LotSFApx PrcSqft Stories
0 SDCA 230018348 SOLD CONDOS ... 2023-10-03 290110 803 2
1 SDCA 230016614 SOLD TOWNHOMES ... 2023-10-03 None 838 3
2 SDCA 230016367 SOLD CONDOS ... 2023-10-03 30056 649 1
3 MRCA NDP2306335 SOLD SINGLE_FAMILY ... 2023-10-03 7519 661 2
4 SDCA 230014532 SOLD CONDOS ... 2023-10-03 None 752 1
[5 rows x 22 columns]
```
### Parameters for `scrape_property()`
```
Required
├── location (str): The address in various formats - this could be just a zip code, a full address, or city/state, etc.
└── listing_type (option): Choose the type of listing.
- 'for_rent'
- 'for_sale'
- 'sold'
- 'pending'
Optional
├── radius (decimal): Radius in miles to find comparable properties based on individual addresses.
│ Example: 5.5 (fetches properties within a 5.5-mile radius if location is set to a specific address; otherwise, ignored)
├── past_days (integer): Number of past days to filter properties. Utilizes 'last_sold_date' for 'sold' listing types, and 'list_date' for others (for_rent, for_sale).
│ Example: 30 (fetches properties listed/sold in the last 30 days)
├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings)
└── proxy (string): In format 'http://user:pass@host:port'
```
### Property Schema
```plaintext
Property
├── Basic Information:
├── property_url (str)
├── site_name (enum): zillow, redfin, realtor.com
├── listing_type (enum): for_sale, for_rent, sold
└── property_type (enum): house, apartment, condo, townhouse, single_family, multi_family, building
│ ├── property_url
│ ├── mls
│ ├── mls_id
│ └── status
├── Address Details:
├── street_address (str)
├── city (str)
├── state (str)
├── zip_code (str)
├── unit (str)
│ └── country (str)
│ ├── street
│ ├── unit
│ ├── city
│ ├── state
└── zip_code
├── House for Sale Features:
├── tax_assessed_value (int)
├── lot_area_value (float)
├── lot_area_unit (str)
├── stories (int)
├── year_built (int)
└── price_per_sqft (int)
├── Property Description:
│ ├── style
│ ├── beds
│ ├── full_baths
│ ├── half_baths
│ ├── sqft
├── year_built
│ ├── stories
│ └── lot_sqft
├── Building for Sale and Apartment Details:
├── bldg_name (str)
├── beds_min (int)
├── beds_max (int)
├── baths_min (float)
├── baths_max (float)
├── sqft_min (int)
├── sqft_max (int)
│ ├── price_min (int)
│ ├── price_max (int)
│ ├── area_min (int)
│ └── unit_count (int)
├── Property Listing Details:
│ ├── days_on_mls
│ ├── list_price
│ ├── list_date
│ ├── sold_price
│ ├── last_sold_date
│ ├── price_per_sqft
└── hoa_fee
├── Miscellaneous Details:
├── mls_id (str)
├── agent_name (str)
│ ├── img_src (str)
│ ├── description (str)
│ ├── status_text (str)
│ └── posted_time (str)
├── Location Details:
│ ├── latitude
│ ├── longitude
└── Location Details:
── latitude (float)
└── longitude (float)
└── Parking Details:
── parking_garage
```
## Supported Countries for Property Scraping
* **Zillow**: contains listings in the **US** & **Canada**
* **Realtor.com**: mainly from the **US** but also has international listings
* **Redfin**: listings mainly in the **US**, **Canada**, & has expanded to some areas in **Mexico**
### Exceptions
The following exceptions may be raised when using HomeHarvest:
- `InvalidSite` - valid options: `zillow`, `redfin`, `realtor.com`
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `NoResultsFound` - no properties found from your input
- `GeoCoordsNotFound` - if Zillow scraper is not able to derive geo-coordinates from the location you input
- `NoResultsFound` - no properties found from your search
## Frequently Asked Questions
---
**Q: Encountering issues with your queries?**
**A:** Try a single site and/or broaden the location. If problems persist, [submit an issue](https://github.com/ZacharyHampton/HomeHarvest/issues).
**Q: Encountering issues with your searches?**
**A:** Try to broaden the parameters you're using. If problems persist, [submit an issue](https://github.com/ZacharyHampton/HomeHarvest/issues).
---
**Q: Received a Forbidden 403 response code?**
**A:** This indicates that you have been blocked by the real estate site for sending too many requests. Currently, **Zillow** is particularly aggressive with blocking. We recommend:
**A:** This indicates that you have been blocked by Realtor.com for sending too many requests. We recommend:
- Waiting a few seconds between requests.
- Trying a VPN to change your IP address.
- Trying a VPN or useing a proxy as a parameter to scrape_property() to change your IP address.
---

View File

@@ -4,7 +4,9 @@
"cell_type": "code",
"execution_count": null,
"id": "cb48903e-5021-49fe-9688-45cd0bc05d0f",
"metadata": {},
"metadata": {
"is_executing": true
},
"outputs": [],
"source": [
"from homeharvest import scrape_property\n",
@@ -31,7 +33,7 @@
"metadata": {},
"outputs": [],
"source": [
"# scrapes all 3 sites by default\n",
"# check for sale properties\n",
"scrape_property(\n",
" location=\"dallas\",\n",
" listing_type=\"for_sale\"\n",
@@ -53,7 +55,6 @@
"# search a specific address\n",
"scrape_property(\n",
" location=\"2530 Al Lipscomb Way\",\n",
" site_name=\"zillow\",\n",
" listing_type=\"for_sale\"\n",
")"
]
@@ -68,7 +69,6 @@
"# check rentals\n",
"scrape_property(\n",
" location=\"chicago, illinois\",\n",
" site_name=[\"redfin\", \"zillow\"],\n",
" listing_type=\"for_rent\"\n",
")"
]
@@ -86,11 +86,34 @@
"outputs": [],
"source": [
"# check sold properties\n",
"scrape_property(\n",
"properties = scrape_property(\n",
" location=\"90210\",\n",
" site_name=[\"redfin\"],\n",
" listing_type=\"sold\"\n",
")"
" listing_type=\"sold\",\n",
" past_days=10\n",
")\n",
"display(properties)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "628c1ce2",
"metadata": {
"collapsed": false,
"is_executing": true,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"# display clickable URLs\n",
"from IPython.display import display, HTML\n",
"properties['property_url'] = '<a href=\"' + properties['property_url'] + '\" target=\"_blank\">' + properties['property_url'] + '</a>'\n",
"\n",
"html = properties.to_html(escape=False)\n",
"truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'\n",
"display(HTML(truncate_width))"
]
}
],

View File

@@ -0,0 +1,20 @@
from homeharvest import scrape_property
from datetime import datetime
# Generate filename based on current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"HomeHarvest_{current_timestamp}.csv"
properties = scrape_property(
location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent)
past_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
# pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
# mls_only=True, # only fetch MLS listings
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
)
print(f"Number of properties: {len(properties)}")
# Export to csv
properties.to_csv(filename, index=False)
print(properties.head())

View File

@@ -1,171 +1,47 @@
import warnings
import pandas as pd
from typing import Union
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from .core.scrapers import ScraperInput
from .core.scrapers.redfin import RedfinScraper
from .utils import process_result, ordered_properties, validate_input
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.zillow import ZillowScraper
from .core.scrapers.models import ListingType, Property, SiteName
from .exceptions import InvalidSite, InvalidListingType
_scrapers = {
"redfin": RedfinScraper,
"realtor.com": RealtorScraper,
"zillow": ZillowScraper,
}
def _validate_input(site_name: str, listing_type: str) -> None:
if site_name.lower() not in _scrapers:
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")
def _get_ordered_properties(result: Property) -> list[str]:
return [
"property_url",
"site_name",
"listing_type",
"property_type",
"status_text",
"baths_min",
"baths_max",
"beds_min",
"beds_max",
"sqft_min",
"sqft_max",
"price_min",
"price_max",
"unit_count",
"tax_assessed_value",
"price_per_sqft",
"lot_area_value",
"lot_area_unit",
"address_one",
"address_two",
"city",
"state",
"zip_code",
"posted_time",
"area_min",
"bldg_name",
"stories",
"year_built",
"agent_name",
"mls_id",
"img_src",
"latitude",
"longitude",
"description",
]
def _process_result(result: Property) -> pd.DataFrame:
prop_data = result.__dict__
prop_data["site_name"] = prop_data["site_name"].value
prop_data["listing_type"] = prop_data["listing_type"].value.lower()
if "property_type" in prop_data and prop_data["property_type"] is not None:
prop_data["property_type"] = prop_data["property_type"].value.lower()
else:
prop_data["property_type"] = None
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["address_one"] = address_data.address_one
prop_data["address_two"] = address_data.address_two
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip_code
del prop_data["address"]
properties_df = pd.DataFrame([prop_data])
properties_df = properties_df[_get_ordered_properties(result)]
return properties_df
def _scrape_single_site(location: str, site_name: str, listing_type: str, proxy: str = None) -> pd.DataFrame:
"""
Helper function to scrape a single site.
"""
_validate_input(site_name, listing_type)
scraper_input = ScraperInput(
location=location,
listing_type=ListingType[listing_type.upper()],
site_name=SiteName.get_by_value(site_name.lower()),
proxy=proxy,
)
site = _scrapers[site_name.lower()](scraper_input)
results = site.search()
properties_dfs = [_process_result(result) for result in results]
properties_dfs = [df.dropna(axis=1, how="all") for df in properties_dfs if not df.empty]
if not properties_dfs:
return pd.DataFrame()
return pd.concat(properties_dfs, ignore_index=True)
from .core.scrapers.models import ListingType
from .exceptions import InvalidListingType, NoResultsFound
def scrape_property(
location: str,
site_name: Union[str, list[str]] = None,
listing_type: str = "for_sale",
radius: float = None,
mls_only: bool = False,
past_days: int = None,
proxy: str = None,
keep_duplicates: bool = False
) -> pd.DataFrame:
"""
Scrape property from various sites from a given location and listing type.
:returns: pd.DataFrame
:param location: US Location (e.g. 'San Francisco, CA', 'Cook County, IL', '85281', '2530 Al Lipscomb Way')
:param site_name: Site name or list of site names (e.g. ['realtor.com', 'zillow'], 'redfin')
:param listing_type: Listing type (e.g. 'for_sale', 'for_rent', 'sold')
:return: pd.DataFrame containing properties
Scrape properties from Realtor.com based on a given location and listing type.
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
:param listing_type: Listing Type (for_sale, for_rent, sold)
:param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
:param mls_only: If set, fetches only listings with MLS IDs.
:param past_days: Get properties sold or listed (dependent on your listing_type) in the last _ days.
:param proxy: Proxy to use for scraping
"""
if site_name is None:
site_name = list(_scrapers.keys())
validate_input(listing_type)
if not isinstance(site_name, list):
site_name = [site_name]
scraper_input = ScraperInput(
location=location,
listing_type=ListingType[listing_type.upper()],
proxy=proxy,
radius=radius,
mls_only=mls_only,
last_x_days=past_days,
)
results = []
site = RealtorScraper(scraper_input)
results = site.search()
if len(site_name) == 1:
final_df = _scrape_single_site(location, site_name[0], listing_type, proxy)
results.append(final_df)
else:
with ThreadPoolExecutor() as executor:
futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type, proxy): s_name
for s_name in site_name
}
properties_dfs = [process_result(result) for result in results]
if not properties_dfs:
raise NoResultsFound("no results found for the query")
for future in concurrent.futures.as_completed(futures):
result = future.result()
results.append(result)
results = [df for df in results if not df.empty and not df.isna().all().all()]
if not results:
return pd.DataFrame()
final_df = pd.concat(results, ignore_index=True)
columns_to_track = ["address_one", "address_two", "city"]
#: validate they exist, otherwise create them
for col in columns_to_track:
if col not in final_df.columns:
final_df[col] = None
if not keep_duplicates:
final_df = final_df.drop_duplicates(subset=columns_to_track, keep="first")
return final_df
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties]

View File

@@ -5,15 +5,8 @@ from homeharvest import scrape_property
def main():
parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
parser.add_argument("location", type=str, help="Location to scrape (e.g., San Francisco, CA)")
parser.add_argument(
"-s",
"--site_name",
type=str,
nargs="*",
default=None,
help="Site name(s) to scrape from (e.g., realtor, zillow)",
"location", type=str, help="Location to scrape (e.g., San Francisco, CA)"
)
parser.add_argument(
@@ -21,7 +14,7 @@ def main():
"--listing_type",
type=str,
default="for_sale",
choices=["for_sale", "for_rent", "sold"],
choices=["for_sale", "for_rent", "sold", "pending"],
help="Listing type to scrape",
)
@@ -43,17 +36,40 @@ def main():
)
parser.add_argument(
"-k",
"--keep_duplicates",
action="store_true",
help="Keep duplicate properties based on address"
"-p", "--proxy", type=str, default=None, help="Proxy to use for scraping"
)
parser.add_argument(
"-d",
"--days",
type=int,
default=None,
help="Sold/listed in last _ days filter.",
)
parser.add_argument("-p", "--proxy", type=str, default=None, help="Proxy to use for scraping")
parser.add_argument(
"-r",
"--radius",
type=float,
default=None,
help="Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses.",
)
parser.add_argument(
"-m",
"--mls_only",
action="store_true",
help="If set, fetches only MLS listings.",
)
args = parser.parse_args()
result = scrape_property(args.location, args.site_name, args.listing_type, proxy=args.proxy, keep_duplicates=args.keep_duplicates)
result = scrape_property(
args.location,
args.listing_type,
radius=args.radius,
proxy=args.proxy,
mls_only=args.mls_only,
past_days=args.days,
)
if not args.filename:
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

View File

@@ -7,22 +7,35 @@ from .models import Property, ListingType, SiteName
class ScraperInput:
location: str
listing_type: ListingType
site_name: SiteName
radius: float | None = None
mls_only: bool | None = None
proxy: str | None = None
last_x_days: int | None = None
class Scraper:
def __init__(self, scraper_input: ScraperInput):
def __init__(
self,
scraper_input: ScraperInput,
session: requests.Session = None,
):
self.location = scraper_input.location
self.listing_type = scraper_input.listing_type
if not session:
self.session = requests.Session()
else:
self.session = session
if scraper_input.proxy:
proxy_url = scraper_input.proxy
proxies = {"http": proxy_url, "https": proxy_url}
self.session.proxies.update(proxies)
self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name
self.radius = scraper_input.radius
self.last_x_days = scraper_input.last_x_days
self.mls_only = scraper_input.mls_only
def search(self) -> list[Property]:
...

View File

@@ -1,6 +1,6 @@
from dataclasses import dataclass
from enum import Enum
from typing import Tuple
from typing import Optional
class SiteName(Enum):
@@ -19,91 +19,49 @@ class SiteName(Enum):
class ListingType(Enum):
FOR_SALE = "FOR_SALE"
FOR_RENT = "FOR_RENT"
PENDING = "PENDING"
SOLD = "SOLD"
class PropertyType(Enum):
HOUSE = "HOUSE"
BUILDING = "BUILDING"
CONDO = "CONDO"
TOWNHOUSE = "TOWNHOUSE"
SINGLE_FAMILY = "SINGLE_FAMILY"
MULTI_FAMILY = "MULTI_FAMILY"
MANUFACTURED = "MANUFACTURED"
NEW_CONSTRUCTION = "NEW_CONSTRUCTION"
APARTMENT = "APARTMENT"
APARTMENTS = "APARTMENTS"
LAND = "LAND"
LOT = "LOT"
OTHER = "OTHER"
BLANK = "BLANK"
@classmethod
def from_int_code(cls, code):
mapping = {
1: cls.HOUSE,
2: cls.CONDO,
3: cls.TOWNHOUSE,
4: cls.MULTI_FAMILY,
5: cls.LAND,
6: cls.OTHER,
8: cls.SINGLE_FAMILY,
13: cls.SINGLE_FAMILY,
}
return mapping.get(code, cls.BLANK)
@dataclass
class Address:
address_one: str | None = None
address_two: str | None = "#"
street: str | None = None
unit: str | None = None
city: str | None = None
state: str | None = None
zip_code: str | None = None
zip: str | None = None
@dataclass
class Description:
style: str | None = None
beds: int | None = None
baths_full: int | None = None
baths_half: int | None = None
sqft: int | None = None
lot_sqft: int | None = None
sold_price: int | None = None
year_built: int | None = None
garage: float | None = None
stories: int | None = None
@dataclass
class Property:
property_url: str
site_name: SiteName
listing_type: ListingType
address: Address
property_type: PropertyType | None = None
# house for sale
tax_assessed_value: int | None = None
lot_area_value: float | None = None
lot_area_unit: str | None = None
stories: int | None = None
year_built: int | None = None
price_per_sqft: int | None = None
mls: str | None = None
mls_id: str | None = None
status: str | None = None
address: Address | None = None
agent_name: str | None = None
img_src: str | None = None
description: str | None = None
status_text: str | None = None
posted_time: str | None = None
# building for sale
bldg_name: str | None = None
area_min: int | None = None
beds_min: int | None = None
beds_max: int | None = None
baths_min: float | None = None
baths_max: float | None = None
sqft_min: int | None = None
sqft_max: int | None = None
price_min: int | None = None
price_max: int | None = None
unit_count: int | None = None
list_price: int | None = None
list_date: str | None = None
last_sold_date: str | None = None
prc_sqft: int | None = None
hoa_fee: int | None = None
days_on_mls: int | None = None
description: Description | None = None
latitude: float | None = None
longitude: float | None = None
neighborhoods: Optional[str] = None

View File

@@ -2,39 +2,26 @@
homeharvest.realtor.__init__
~~~~~~~~~~~~
This module implements the scraper for relator.com
This module implements the scraper for realtor.com
"""
from ..models import Property, Address
from datetime import datetime
from typing import Dict, Union, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from .. import Scraper
from ....exceptions import NoResultsFound
from ....utils import parse_address_one, parse_address_two
from concurrent.futures import ThreadPoolExecutor, as_completed
from ..models import Property, Address, ListingType, Description
class RealtorScraper(Scraper):
SEARCH_GQL_URL = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
PROPERTY_URL = "https://www.realtor.com/realestateandhomes-detail/"
ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest"
def __init__(self, scraper_input):
self.counter = 1
super().__init__(scraper_input)
self.search_url = (
"https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
)
def handle_location(self):
headers = {
"authority": "parser-external.geo.moveaws.com",
"accept": "*/*",
"accept-language": "en-US,en;q=0.9",
"origin": "https://www.realtor.com",
"referer": "https://www.realtor.com/",
"sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "cross-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
}
params = {
"input": self.location,
"client_id": self.listing_type.value.lower().replace("_", "-"),
@@ -43,9 +30,8 @@ class RealtorScraper(Scraper):
}
response = self.session.get(
"https://parser-external.geo.moveaws.com/suggest",
self.ADDRESS_AUTOCOMPLETE_URL,
params=params,
headers=headers,
)
response_json = response.json()
@@ -56,6 +42,165 @@ class RealtorScraper(Scraper):
return result[0]
def handle_listing(self, listing_id: str) -> list[Property]:
query = """query Listing($listing_id: ID!) {
listing(id: $listing_id) {
source {
id
listing_id
}
address {
street_number
street_name
street_suffix
unit
city
state_code
postal_code
location {
coordinate {
lat
lon
}
}
}
basic {
sqft
beds
baths_full
baths_half
lot_sqft
sold_price
sold_price
type
price
status
sold_date
list_date
}
details {
year_built
stories
garage
permalink
}
}
}"""
variables = {"listing_id": listing_id}
payload = {
"query": query,
"variables": variables,
}
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response_json = response.json()
property_info = response_json["data"]["listing"]
mls = (
property_info["source"].get("id")
if "source" in property_info and isinstance(property_info["source"], dict)
else None
)
able_to_get_lat_long = (
property_info
and property_info.get("address")
and property_info["address"].get("location")
and property_info["address"]["location"].get("coordinate")
)
list_date_str = property_info["basic"]["list_date"].split("T")[0] if property_info["basic"].get(
"list_date") else None
last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get(
"sold_date") else None
list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
today = datetime.now()
days_on_mls = None
status = property_info["basic"]["status"].lower()
if list_date:
if status == "sold" and last_sold_date:
days_on_mls = (last_sold_date - list_date).days
elif status in ('for_sale', 'for_rent'):
days_on_mls = (today - list_date).days
if days_on_mls and days_on_mls < 0:
days_on_mls = None
listing = Property(
mls=mls,
mls_id=property_info["source"].get("listing_id")
if "source" in property_info and isinstance(property_info["source"], dict)
else None,
property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
status=property_info["basic"]["status"].upper(),
list_price=property_info["basic"]["price"],
list_date=list_date,
prc_sqft=property_info["basic"].get("price")
/ property_info["basic"].get("sqft")
if property_info["basic"].get("price")
and property_info["basic"].get("sqft")
else None,
last_sold_date=last_sold_date,
latitude=property_info["address"]["location"]["coordinate"].get("lat")
if able_to_get_lat_long
else None,
longitude=property_info["address"]["location"]["coordinate"].get("lon")
if able_to_get_lat_long
else None,
address=self._parse_address(property_info, search_type="handle_listing"),
description=Description(
style=property_info["basic"].get("type", "").upper(),
beds=property_info["basic"].get("beds"),
baths_full=property_info["basic"].get("baths_full"),
baths_half=property_info["basic"].get("baths_half"),
sqft=property_info["basic"].get("sqft"),
lot_sqft=property_info["basic"].get("lot_sqft"),
sold_price=property_info["basic"].get("sold_price"),
year_built=property_info["details"].get("year_built"),
garage=property_info["details"].get("garage"),
stories=property_info["details"].get("stories"),
),
days_on_mls=days_on_mls
)
return [listing]
def get_latest_listing_id(self, property_id: str) -> str | None:
query = """query Property($property_id: ID!) {
property(id: $property_id) {
listings {
listing_id
primary
}
}
}
"""
variables = {"property_id": property_id}
payload = {
"query": query,
"variables": variables,
}
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response_json = response.json()
property_info = response_json["data"]["property"]
if property_info["listings"] is None:
return None
primary_listing = next(
(listing for listing in property_info["listings"] if listing["primary"]),
None,
)
if primary_listing:
return primary_listing["listing_id"]
else:
return property_info["listings"][0]["listing_id"]
def handle_address(self, property_id: str) -> list[Property]:
"""
Handles a specific address & returns one property
@@ -71,22 +216,19 @@ class RealtorScraper(Scraper):
stories
}
address {
address_validation_code
city
country
county
line
postal_code
state_code
street_direction
street_name
street_number
street_name
street_suffix
street_post_direction
unit_value
unit
unit_descriptor
zip
city
state_code
postal_code
location {
coordinate {
lat
lon
}
}
}
basic {
baths
@@ -114,51 +256,138 @@ class RealtorScraper(Scraper):
"variables": variables,
}
response = self.session.post(self.search_url, json=payload)
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response_json = response.json()
property_info = response_json["data"]["property"]
address_one, address_two = parse_address_one(property_info["address"]["line"])
return [
Property(
site_name=self.site_name,
address=Address(
address_one=address_one,
address_two=address_two,
city=property_info["address"]["city"],
state=property_info["address"]["state_code"],
zip_code=property_info["address"]["postal_code"],
),
property_url="https://www.realtor.com/realestateandhomes-detail/"
+ property_info["details"]["permalink"],
stories=property_info["details"]["stories"],
year_built=property_info["details"]["year_built"],
price_per_sqft=property_info["basic"]["price"] // property_info["basic"]["sqft"]
if property_info["basic"]["sqft"] is not None and property_info["basic"]["price"] is not None
else None,
mls_id=property_id,
listing_type=self.listing_type,
lot_area_value=property_info["public_record"]["lot_size"]
if property_info["public_record"] is not None
else None,
beds_min=property_info["basic"]["beds"],
beds_max=property_info["basic"]["beds"],
baths_min=property_info["basic"]["baths"],
baths_max=property_info["basic"]["baths"],
sqft_min=property_info["basic"]["sqft"],
sqft_max=property_info["basic"]["sqft"],
price_min=property_info["basic"]["price"],
price_max=property_info["basic"]["price"],
property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
address=self._parse_address(
property_info, search_type="handle_address"
),
description=self._parse_description(property_info),
)
]
def handle_area(self, variables: dict, return_total: bool = False) -> list[Property] | int:
def general_search(
self, variables: dict, search_type: str
) -> Dict[str, Union[int, list[Property]]]:
"""
Handles a location area & returns a list of properties
"""
query = (
"""query Home_search(
results_query = """{
count
total
results {
property_id
list_date
status
last_sold_price
last_sold_date
list_price
price_per_sqft
flags {
is_contingent
is_pending
}
description {
sqft
beds
baths_full
baths_half
lot_sqft
sold_price
year_built
garage
sold_price
type
name
stories
}
source {
id
listing_id
}
hoa {
fee
}
location {
address {
street_number
street_name
street_suffix
unit
city
state_code
postal_code
coordinate {
lon
lat
}
}
neighborhoods {
name
}
}
}
}
}"""
date_param = (
'sold_date: { min: "$today-%sD" }' % self.last_x_days
if self.listing_type == ListingType.SOLD and self.last_x_days
else (
'list_date: { min: "$today-%sD" }' % self.last_x_days
if self.last_x_days
else ""
)
)
sort_param = (
"sort: [{ field: sold_date, direction: desc }]"
if self.listing_type == ListingType.SOLD
else "sort: [{ field: list_date, direction: desc }]"
)
pending_or_contingent_param = (
"or_filters: { contingent: true, pending: true }"
if self.listing_type == ListingType.PENDING
else ""
)
listing_type = ListingType.FOR_SALE if self.listing_type == ListingType.PENDING else self.listing_type
if search_type == "comps": #: comps search, came from an address
query = """query Property_search(
$coordinates: [Float]!
$radius: String!
$offset: Int!,
) {
home_search(
query: {
nearby: {
coordinates: $coordinates
radius: $radius
}
status: %s
%s
%s
}
%s
limit: 200
offset: $offset
) %s""" % (
listing_type.value.lower(),
date_param,
pending_or_contingent_param,
sort_param,
results_query,
)
elif search_type == "area": #: general search, came from a general location
query = """query Home_search(
$city: String,
$county: [String],
$state_code: String,
@@ -172,56 +401,33 @@ class RealtorScraper(Scraper):
postal_code: $postal_code
state_code: $state_code
status: %s
%s
%s
}
%s
limit: 200
offset: $offset
) %s""" % (
listing_type.value.lower(),
date_param,
pending_or_contingent_param,
sort_param,
results_query,
)
else: #: general search, came from an address
query = (
"""query Property_search(
$property_id: [ID]!
$offset: Int!,
) {
count
total
results {
property_id
description {
baths
beds
lot_sqft
sqft
text
sold_price
stories
year_built
garage
unit_number
floor_number
property_search(
query: {
property_id: $property_id
}
location {
address {
city
country
line
postal_code
state_code
state
street_direction
street_name
street_number
street_post_direction
street_suffix
unit
coordinate {
lon
lat
}
}
}
list_price
price_per_sqft
source {
id
}
}
}
}"""
% self.listing_type.value.lower()
limit: 1
offset: $offset
) %s"""
% results_query
)
payload = {
@@ -229,12 +435,10 @@ class RealtorScraper(Scraper):
"variables": variables,
}
response = self.session.post(self.search_url, json=payload)
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response.raise_for_status()
response_json = response.json()
if return_total:
return response_json["data"]["home_search"]["total"]
search_key = "home_search" if "home_search" in query else "property_search"
properties: list[Property] = []
@@ -242,89 +446,200 @@ class RealtorScraper(Scraper):
response_json is None
or "data" not in response_json
or response_json["data"] is None
or "home_search" not in response_json["data"]
or response_json["data"]["home_search"] is None
or "results" not in response_json["data"]["home_search"]
or search_key not in response_json["data"]
or response_json["data"][search_key] is None
or "results" not in response_json["data"][search_key]
):
return []
return {"total": 0, "properties": []}
for result in response_json["data"][search_key]["results"]:
mls = (
result["source"].get("id")
if "source" in result and isinstance(result["source"], dict)
else None
)
if not mls and self.mls_only:
continue
able_to_get_lat_long = (
result
and result.get("location")
and result["location"].get("address")
and result["location"]["address"].get("coordinate")
)
is_pending = result["flags"].get("is_pending") or result["flags"].get("is_contingent")
for result in response_json["data"]["home_search"]["results"]:
self.counter += 1
address_one, _ = parse_address_one(result["location"]["address"]["line"])
realty_property = Property(
address=Address(
address_one=address_one,
city=result["location"]["address"]["city"],
state=result["location"]["address"]["state_code"],
zip_code=result["location"]["address"]["postal_code"],
address_two=parse_address_two(result["location"]["address"]["unit"]),
),
latitude=result["location"]["address"]["coordinate"]["lat"]
if result
and result.get("location")
and result["location"].get("address")
and result["location"]["address"].get("coordinate")
and "lat" in result["location"]["address"]["coordinate"]
mls=mls,
mls_id=result["source"].get("listing_id")
if "source" in result and isinstance(result["source"], dict)
else None,
longitude=result["location"]["address"]["coordinate"]["lon"]
if result
and result.get("location")
and result["location"].get("address")
and result["location"]["address"].get("coordinate")
and "lon" in result["location"]["address"]["coordinate"]
property_url=f"{self.PROPERTY_URL}{result['property_id']}",
status="PENDING" if is_pending else result["status"].upper(),
list_price=result["list_price"],
list_date=result["list_date"].split("T")[0]
if result.get("list_date")
else None,
site_name=self.site_name,
property_url="https://www.realtor.com/realestateandhomes-detail/" + result["property_id"],
stories=result["description"]["stories"],
year_built=result["description"]["year_built"],
price_per_sqft=result["price_per_sqft"],
mls_id=result["property_id"],
listing_type=self.listing_type,
lot_area_value=result["description"]["lot_sqft"],
beds_min=result["description"]["beds"],
beds_max=result["description"]["beds"],
baths_min=result["description"]["baths"],
baths_max=result["description"]["baths"],
sqft_min=result["description"]["sqft"],
sqft_max=result["description"]["sqft"],
price_min=result["list_price"],
price_max=result["list_price"],
prc_sqft=result.get("price_per_sqft"),
last_sold_date=result.get("last_sold_date"),
hoa_fee=result["hoa"]["fee"]
if result.get("hoa") and isinstance(result["hoa"], dict)
else None,
latitude=result["location"]["address"]["coordinate"].get("lat")
if able_to_get_lat_long
else None,
longitude=result["location"]["address"]["coordinate"].get("lon")
if able_to_get_lat_long
else None,
address=self._parse_address(result, search_type="general_search"),
description=self._parse_description(result),
days_on_mls=self.calculate_days_on_mls(result)
)
properties.append(realty_property)
return properties
return {
"total": response_json["data"][search_key]["total"],
"properties": properties,
}
def search(self):
location_info = self.handle_location()
location_type = location_info["area_type"]
if location_type == "address":
property_id = location_info["mpr_id"]
return self.handle_address(property_id)
offset = 0
search_variables = {
"offset": 0,
}
search_type = (
"comps"
if self.radius and location_type == "address"
else "address"
if location_type == "address" and not self.radius
else "area"
)
if location_type == "address":
if not self.radius: #: single address search, non comps
property_id = location_info["mpr_id"]
search_variables |= {"property_id": property_id}
gql_results = self.general_search(
search_variables, search_type=search_type
)
if gql_results["total"] == 0:
listing_id = self.get_latest_listing_id(property_id)
if listing_id is None:
return self.handle_address(property_id)
else:
return self.handle_listing(listing_id)
else:
return gql_results["properties"]
else: #: general search, comps (radius)
coordinates = list(location_info["centroid"].values())
search_variables |= {
"coordinates": coordinates,
"radius": "{}mi".format(self.radius),
}
else: #: general search, location
search_variables |= {
"city": location_info.get("city"),
"county": location_info.get("county"),
"state_code": location_info.get("state_code"),
"postal_code": location_info.get("postal_code"),
"offset": offset,
}
total = self.handle_area(search_variables, return_total=True)
result = self.general_search(search_variables, search_type=search_type)
total = result["total"]
homes = result["properties"]
homes = []
with ThreadPoolExecutor(max_workers=10) as executor:
futures = [
executor.submit(
self.handle_area,
self.general_search,
variables=search_variables | {"offset": i},
return_total=False,
search_type=search_type,
)
for i in range(0, total, 200)
for i in range(200, min(total, 10000), 200)
]
for future in as_completed(futures):
homes.extend(future.result())
homes.extend(future.result()["properties"])
return homes
@staticmethod
def _parse_neighborhoods(result: dict) -> Optional[str]:
neighborhoods_list = []
neighborhoods = result["location"].get("neighborhoods", [])
if neighborhoods:
for neighborhood in neighborhoods:
name = neighborhood.get("name")
if name:
neighborhoods_list.append(name)
return ", ".join(neighborhoods_list) if neighborhoods_list else None
@staticmethod
def _parse_address(result: dict, search_type):
if search_type == "general_search":
return Address(
street=f"{result['location']['address']['street_number']} {result['location']['address']['street_name']} {result['location']['address']['street_suffix']}",
unit=result["location"]["address"]["unit"],
city=result["location"]["address"]["city"],
state=result["location"]["address"]["state_code"],
zip=result["location"]["address"]["postal_code"],
)
return Address(
street=f"{result['address']['street_number']} {result['address']['street_name']} {result['address']['street_suffix']}",
unit=result["address"]["unit"],
city=result["address"]["city"],
state=result["address"]["state_code"],
zip=result["address"]["postal_code"],
)
@staticmethod
def _parse_description(result: dict) -> Description:
description_data = result.get("description", {})
if description_data is None or not isinstance(description_data, dict):
description_data = {}
style = description_data.get("type", "")
if style is not None:
style = style.upper()
return Description(
style=style,
beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"),
baths_half=description_data.get("baths_half"),
sqft=description_data.get("sqft"),
lot_sqft=description_data.get("lot_sqft"),
sold_price=description_data.get("sold_price"),
year_built=description_data.get("year_built"),
garage=description_data.get("garage"),
stories=description_data.get("stories"),
)
@staticmethod
def calculate_days_on_mls(result: dict) -> Optional[int]:
list_date_str = result.get("list_date")
list_date = datetime.strptime(list_date_str.split("T")[0], "%Y-%m-%d") if list_date_str else None
last_sold_date_str = result.get("last_sold_date")
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
today = datetime.now()
if list_date:
if result["status"] == 'sold':
if last_sold_date:
days = (last_sold_date - list_date).days
if days >= 0:
return days
elif result["status"] in ('for_sale', 'for_rent'):
days = (today - list_date).days
if days >= 0:
return days

View File

@@ -1,226 +0,0 @@
"""
homeharvest.redfin.__init__
~~~~~~~~~~~~
This module implements the scraper for redfin.com
"""
import json
from typing import Any
from .. import Scraper
from ....utils import parse_address_two, parse_address_one
from ..models import Property, Address, PropertyType, ListingType, SiteName
from ....exceptions import NoResultsFound
class RedfinScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.listing_type = scraper_input.listing_type
def _handle_location(self):
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(self.location)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
def get_region_type(match_type: str):
if match_type == "4":
return "2" #: zip
elif match_type == "2":
return "6" #: city
elif match_type == "1":
return "address" #: address, needs to be handled differently
if "exactMatch" not in response_json["payload"]:
raise NoResultsFound("No results found for location: {}".format(self.location))
if response_json["payload"]["exactMatch"] is not None:
target = response_json["payload"]["exactMatch"]
else:
target = response_json["payload"]["sections"][0]["rows"][0]
return target["id"].split("_")[1], get_region_type(target["type"])
def _parse_home(self, home: dict, single_search: bool = False) -> Property:
def get_value(key: str) -> Any | None:
if key in home and "value" in home[key]:
return home[key]["value"]
if not single_search:
address = Address(
address_one=parse_address_one(get_value("streetLine"))[0],
address_two=parse_address_one(get_value("streetLine"))[1],
city=home.get("city"),
state=home.get("state"),
zip_code=home.get("zip"),
)
else:
address_info = home.get("streetAddress")
address_one, address_two = parse_address_one(address_info.get("assembledAddress"))
address = Address(
address_one=address_one,
address_two=address_two,
city=home.get("city"),
state=home.get("state"),
zip_code=home.get("zip"),
)
url = "https://www.redfin.com{}".format(home["url"])
lot_size_data = home.get("lotSize")
if not isinstance(lot_size_data, int):
lot_size = lot_size_data.get("value", None) if isinstance(lot_size_data, dict) else None
else:
lot_size = lot_size_data
return Property(
site_name=self.site_name,
listing_type=self.listing_type,
address=address,
property_url=url,
beds_min=home["beds"] if "beds" in home else None,
beds_max=home["beds"] if "beds" in home else None,
baths_min=home["baths"] if "baths" in home else None,
baths_max=home["baths"] if "baths" in home else None,
price_min=get_value("price"),
price_max=get_value("price"),
sqft_min=get_value("sqFt"),
sqft_max=get_value("sqFt"),
stories=home["stories"] if "stories" in home else None,
agent_name=get_value("listingAgent"),
description=home["listingRemarks"] if "listingRemarks" in home else None,
year_built=get_value("yearBuilt") if not single_search else home["yearBuilt"],
lot_area_value=lot_size,
property_type=PropertyType.from_int_code(home.get("propertyType")),
price_per_sqft=get_value("pricePerSqFt"),
mls_id=get_value("mlsId"),
latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
longitude=home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None,
)
def _handle_rentals(self, region_id, region_type):
url = f"https://www.redfin.com/stingray/api/v1/search/rentals?al=1&isRentals=true&region_id={region_id}&region_type={region_type}&num_homes=100000"
response = self.session.get(url)
response.raise_for_status()
homes = response.json()
properties_list = []
for home in homes["homes"]:
home_data = home["homeData"]
rental_data = home["rentalExtension"]
property_url = f"https://www.redfin.com{home_data.get('url', '')}"
address_info = home_data.get("addressInfo", {})
centroid = address_info.get("centroid", {}).get("centroid", {})
address = Address(
address_one=parse_address_one(address_info.get("formattedStreetLine"))[0],
city=address_info.get("city"),
state=address_info.get("state"),
zip_code=address_info.get("zip"),
)
price_range = rental_data.get("rentPriceRange", {"min": None, "max": None})
bed_range = rental_data.get("bedRange", {"min": None, "max": None})
bath_range = rental_data.get("bathRange", {"min": None, "max": None})
sqft_range = rental_data.get("sqftRange", {"min": None, "max": None})
property_ = Property(
property_url=property_url,
site_name=SiteName.REDFIN,
listing_type=ListingType.FOR_RENT,
address=address,
description=rental_data.get("description"),
latitude=centroid.get("latitude"),
longitude=centroid.get("longitude"),
baths_min=bath_range.get("min"),
baths_max=bath_range.get("max"),
beds_min=bed_range.get("min"),
beds_max=bed_range.get("max"),
price_min=price_range.get("min"),
price_max=price_range.get("max"),
sqft_min=sqft_range.get("min"),
sqft_max=sqft_range.get("max"),
img_src=home_data.get("staticMapUrl"),
posted_time=rental_data.get("lastUpdated"),
bldg_name=rental_data.get("propertyName"),
)
properties_list.append(property_)
if not properties_list:
raise NoResultsFound("No rentals found for the given location.")
return properties_list
def _parse_building(self, building: dict) -> Property:
street_address = " ".join(
[
building["address"]["streetNumber"],
building["address"]["directionalPrefix"],
building["address"]["streetName"],
building["address"]["streetType"],
]
)
return Property(
site_name=self.site_name,
property_type=PropertyType("BUILDING"),
address=Address(
address_one=parse_address_one(street_address)[0],
city=building["address"]["city"],
state=building["address"]["stateOrProvinceCode"],
zip_code=building["address"]["postalCode"],
address_two=parse_address_two(
" ".join(
[
building["address"]["unitType"],
building["address"]["unitValue"],
]
)
),
),
property_url="https://www.redfin.com{}".format(building["url"]),
listing_type=self.listing_type,
unit_count=building["numUnitsForSale"],
)
def handle_address(self, home_id: str):
"""
EPs:
https://www.redfin.com/stingray/api/home/details/initialInfo?al=1&path=/TX/Austin/70-Rainey-St-78701/unit-1608/home/147337694
https://www.redfin.com/stingray/api/home/details/mainHouseInfoPanelInfo?propertyId=147337694&accessLevel=3
https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=147337694&accessLevel=3
https://www.redfin.com/stingray/api/home/details/belowTheFold?propertyId=147337694&accessLevel=3
"""
url = "https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId={}&accessLevel=3".format(
home_id
)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
parsed_home = self._parse_home(response_json["payload"]["addressSectionInfo"], single_search=True)
return [parsed_home]
def search(self):
region_id, region_type = self._handle_location()
if region_type == "address":
home_id = region_id
return self.handle_address(home_id)
if self.listing_type == ListingType.FOR_RENT:
return self._handle_rentals(region_id, region_type)
else:
if self.listing_type == ListingType.FOR_SALE:
url = f"https://www.redfin.com/stingray/api/gis?al=1&region_id={region_id}&region_type={region_type}&num_homes=100000"
else:
url = f"https://www.redfin.com/stingray/api/gis?al=1&region_id={region_id}&region_type={region_type}&sold_within_days=30&num_homes=100000"
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
homes = [self._parse_home(home) for home in response_json["payload"]["homes"]] + [
self._parse_building(building) for building in response_json["payload"]["buildings"].values()
]
return homes

View File

@@ -1,315 +0,0 @@
"""
homeharvest.zillow.__init__
~~~~~~~~~~~~
This module implements the scraper for zillow.com
"""
import re
import json
from .. import Scraper
from ....utils import parse_address_one, parse_address_two
from ....exceptions import GeoCoordsNotFound, NoResultsFound
from ..models import Property, Address, ListingType, PropertyType
class ZillowScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
if not self.is_plausible_location(self.location):
raise NoResultsFound("Invalid location input: {}".format(self.location))
listing_type_to_url_path = {
ListingType.FOR_SALE: "for_sale",
ListingType.FOR_RENT: "for_rent",
ListingType.SOLD: "recently_sold",
}
self.url = f"https://www.zillow.com/homes/{listing_type_to_url_path[self.listing_type]}/{self.location}_rb/"
def is_plausible_location(self, location: str) -> bool:
url = (
"https://www.zillowstatic.com/autocomplete/v3/suggestions?q={"
"}&abKey=6666272a-4b99-474c-b857-110ec438732b&clientId=homepage-render"
).format(location)
response = self.session.get(url)
return response.json()["results"] != []
def search(self):
resp = self.session.get(self.url, headers=self._get_headers())
resp.raise_for_status()
content = resp.text
match = re.search(
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
content,
re.DOTALL,
)
if not match:
raise NoResultsFound("No results were found for Zillow with the given Location.")
json_str = match.group(1)
data = json.loads(json_str)
if "searchPageState" in data["props"]["pageProps"]:
pattern = r'window\.mapBounds = \{\s*"west":\s*(-?\d+\.\d+),\s*"east":\s*(-?\d+\.\d+),\s*"south":\s*(-?\d+\.\d+),\s*"north":\s*(-?\d+\.\d+)\s*\};'
match = re.search(pattern, content)
if match:
coords = [float(coord) for coord in match.groups()]
return self._fetch_properties_backend(coords)
else:
raise GeoCoordsNotFound("Box bounds could not be located.")
elif "gdpClientCache" in data["props"]["pageProps"]:
gdp_client_cache = json.loads(data["props"]["pageProps"]["gdpClientCache"])
main_key = list(gdp_client_cache.keys())[0]
property_data = gdp_client_cache[main_key]["property"]
property = self._get_single_property_page(property_data)
return [property]
raise NoResultsFound("Specific property data not found in the response.")
def _fetch_properties_backend(self, coords):
url = "https://www.zillow.com/async-create-search-page-state"
filter_state_for_sale = {
"sortSelection": {
# "value": "globalrelevanceex"
"value": "days"
},
"isAllHomes": {"value": True},
}
filter_state_for_rent = {
"isForRent": {"value": True},
"isForSaleByAgent": {"value": False},
"isForSaleByOwner": {"value": False},
"isNewConstruction": {"value": False},
"isComingSoon": {"value": False},
"isAuction": {"value": False},
"isForSaleForeclosure": {"value": False},
"isAllHomes": {"value": True},
}
filter_state_sold = {
"isRecentlySold": {"value": True},
"isForSaleByAgent": {"value": False},
"isForSaleByOwner": {"value": False},
"isNewConstruction": {"value": False},
"isComingSoon": {"value": False},
"isAuction": {"value": False},
"isForSaleForeclosure": {"value": False},
"isAllHomes": {"value": True},
}
selected_filter = (
filter_state_for_rent
if self.listing_type == ListingType.FOR_RENT
else filter_state_for_sale
if self.listing_type == ListingType.FOR_SALE
else filter_state_sold
)
payload = {
"searchQueryState": {
"pagination": {},
"isMapVisible": True,
"mapBounds": {
"west": coords[0],
"east": coords[1],
"south": coords[2],
"north": coords[3],
},
"filterState": selected_filter,
"isListVisible": True,
"mapZoom": 11,
},
"wants": {"cat1": ["mapResults"]},
"isDebugRequest": False,
}
resp = self.session.put(url, headers=self._get_headers(), json=payload)
resp.raise_for_status()
a = resp.json()
return self._parse_properties(resp.json())
def _parse_properties(self, property_data: dict):
mapresults = property_data["cat1"]["searchResults"]["mapResults"]
properties_list = []
for result in mapresults:
if "hdpData" in result:
home_info = result["hdpData"]["homeInfo"]
address_data = {
"address_one": parse_address_one(home_info["streetAddress"])[0],
"address_two": parse_address_two(home_info["unit"]) if "unit" in home_info else "#",
"city": home_info["city"],
"state": home_info["state"],
"zip_code": home_info["zipcode"],
}
property_obj = Property(
site_name=self.site_name,
address=Address(**address_data),
property_url=f"https://www.zillow.com{result['detailUrl']}",
tax_assessed_value=int(home_info["taxAssessedValue"]) if "taxAssessedValue" in home_info else None,
property_type=PropertyType(home_info["homeType"]),
listing_type=ListingType(
home_info["statusType"] if "statusType" in home_info else self.listing_type
),
status_text=result.get("statusText"),
posted_time=result["variableData"]["text"]
if "variableData" in result
and "text" in result["variableData"]
and result["variableData"]["type"] == "TIME_ON_INFO"
else None,
price_min=home_info.get("price"),
price_max=home_info.get("price"),
beds_min=int(home_info["bedrooms"]) if "bedrooms" in home_info else None,
beds_max=int(home_info["bedrooms"]) if "bedrooms" in home_info else None,
baths_min=home_info.get("bathrooms"),
baths_max=home_info.get("bathrooms"),
sqft_min=int(home_info["livingArea"]) if "livingArea" in home_info else None,
sqft_max=int(home_info["livingArea"]) if "livingArea" in home_info else None,
price_per_sqft=int(home_info["price"] // home_info["livingArea"])
if "livingArea" in home_info and home_info["livingArea"] != 0 and "price" in home_info
else None,
latitude=result["latLong"]["latitude"],
longitude=result["latLong"]["longitude"],
lot_area_value=round(home_info["lotAreaValue"], 2) if "lotAreaValue" in home_info else None,
lot_area_unit=home_info.get("lotAreaUnit"),
img_src=result.get("imgSrc"),
)
properties_list.append(property_obj)
elif "isBuilding" in result:
price_string = result["price"].replace("$", "").replace(",", "").replace("+/mo", "")
match = re.search(r"(\d+)", price_string)
price_value = int(match.group(1)) if match else None
building_obj = Property(
property_url=f"https://www.zillow.com{result['detailUrl']}",
site_name=self.site_name,
property_type=PropertyType("BUILDING"),
listing_type=ListingType(result["statusType"]),
img_src=result["imgSrc"],
address=self._extract_address(result["address"]),
baths_min=result["minBaths"],
area_min=result.get("minArea"),
bldg_name=result.get("communityName"),
status_text=result["statusText"],
beds_min=result["minBeds"],
price_min=price_value if "+/mo" in result["price"] else None,
price_max=price_value if "+/mo" in result["price"] else None,
latitude=result["latLong"]["latitude"],
longitude=result["latLong"]["longitude"],
unit_count=result["unitCount"],
)
properties_list.append(building_obj)
return properties_list
def _get_single_property_page(self, property_data: dict):
"""
This method is used when a user enters the exact location & zillow returns just one property
"""
url = (
f"https://www.zillow.com{property_data['hdpUrl']}"
if "zillow.com" not in property_data["hdpUrl"]
else property_data["hdpUrl"]
)
address_data = property_data["address"]
address_one, address_two = parse_address_one(address_data["streetAddress"])
address = Address(
address_one=address_one,
address_two=address_two if address_two else "#",
city=address_data["city"],
state=address_data["state"],
zip_code=address_data["zipcode"],
)
property_type = property_data.get("homeType", None)
return Property(
site_name=self.site_name,
property_url=url,
property_type=PropertyType(property_type),
listing_type=self.listing_type,
address=address,
year_built=property_data.get("yearBuilt"),
tax_assessed_value=property_data.get("taxAssessedValue"),
lot_area_value=property_data.get("lotAreaValue"),
lot_area_unit=property_data["lotAreaUnits"].lower() if "lotAreaUnits" in property_data else None,
agent_name=property_data.get("attributionInfo", {}).get("agentName"),
stories=property_data.get("resoFacts", {}).get("stories"),
mls_id=property_data.get("attributionInfo", {}).get("mlsId"),
beds_min=property_data.get("bedrooms"),
beds_max=property_data.get("bedrooms"),
baths_min=property_data.get("bathrooms"),
baths_max=property_data.get("bathrooms"),
price_min=property_data.get("price"),
price_max=property_data.get("price"),
sqft_min=property_data.get("livingArea"),
sqft_max=property_data.get("livingArea"),
price_per_sqft=property_data.get("resoFacts", {}).get("pricePerSquareFoot"),
latitude=property_data.get("latitude"),
longitude=property_data.get("longitude"),
img_src=property_data.get("streetViewTileImageUrlMediumAddress"),
description=property_data.get("description"),
)
def _extract_address(self, address_str):
"""
Extract address components from a string formatted like '555 Wedglea Dr, Dallas, TX',
and return an Address object.
"""
parts = address_str.split(", ")
if len(parts) != 3:
raise ValueError(f"Unexpected address format: {address_str}")
address_one = parts[0].strip()
city = parts[1].strip()
state_zip = parts[2].split(" ")
if len(state_zip) == 1:
state = state_zip[0].strip()
zip_code = None
elif len(state_zip) == 2:
state = state_zip[0].strip()
zip_code = state_zip[1].strip()
else:
raise ValueError(f"Unexpected state/zip format in address: {address_str}")
address_one, address_two = parse_address_one(address_one)
return Address(
address_one=address_one,
address_two=address_two if address_two else "#",
city=city,
state=state,
zip_code=zip_code,
)
@staticmethod
def _get_headers():
return {
"authority": "www.zillow.com",
"accept": "*/*",
"accept-language": "en-US,en;q=0.9",
"content-type": "application/json",
"cookie": 'zjs_user_id=null; zg_anonymous_id=%220976ab81-2950-4013-98f0-108b15a554d2%22; zguid=24|%246b1bc625-3955-4d1e-a723-e59602e4ed08; g_state={"i_p":1693611172520,"i_l":1}; zgsession=1|d48820e2-1659-4d2f-b7d2-99a8127dd4f3; zjs_anonymous_id=%226b1bc625-3955-4d1e-a723-e59602e4ed08%22; JSESSIONID=82E8274D3DC8AF3AB9C8E613B38CF861; search=6|1697585860120%7Crb%3DDallas%252C-TX%26rect%3D33.016646%252C-96.555516%252C32.618763%252C-96.999347%26disp%3Dmap%26mdm%3Dauto%26sort%3Ddays%26listPriceActive%3D1%26fs%3D1%26fr%3D0%26mmm%3D0%26rs%3D0%26ah%3D0%26singlestory%3D0%26abo%3D0%26garage%3D0%26pool%3D0%26ac%3D0%26waterfront%3D0%26finished%3D0%26unfinished%3D0%26cityview%3D0%26mountainview%3D0%26parkview%3D0%26waterview%3D0%26hoadata%3D1%263dhome%3D0%26commuteMode%3Ddriving%26commuteTimeOfDay%3Dnow%09%0938128%09%7B%22isList%22%3Atrue%2C%22isMap%22%3Atrue%7D%09%09%09%09%09; AWSALB=gAlFj5Ngnd4bWP8k7CME/+YlTtX9bHK4yEkdPHa3VhL6K523oGyysFxBEpE1HNuuyL+GaRPvt2i/CSseAb+zEPpO4SNjnbLAJzJOOO01ipnWN3ZgPaa5qdv+fAki; AWSALBCORS=gAlFj5Ngnd4bWP8k7CME/+YlTtX9bHK4yEkdPHa3VhL6K523oGyysFxBEpE1HNuuyL+GaRPvt2i/CSseAb+zEPpO4SNjnbLAJzJOOO01ipnWN3ZgPaa5qdv+fAki; search=6|1697587741808%7Crect%3D33.37188814545521%2C-96.34484483007813%2C32.260490641365685%2C-97.21001816992188%26disp%3Dmap%26mdm%3Dauto%26p%3D1%26sort%3Ddays%26z%3D1%26listPriceActive%3D1%26fs%3D1%26fr%3D0%26mmm%3D0%26rs%3D0%26ah%3D0%26singlestory%3D0%26housing-connector%3D0%26abo%3D0%26garage%3D0%26pool%3D0%26ac%3D0%26waterfront%3D0%26finished%3D0%26unfinished%3D0%26cityview%3D0%26mountainview%3D0%26parkview%3D0%26waterview%3D0%26hoadata%3D1%26zillow-owned%3D0%263dhome%3D0%26featuredMultiFamilyBuilding%3D0%26commuteMode%3Ddriving%26commuteTimeOfDay%3Dnow%09%09%09%7B%22isList%22%3Atrue%2C%22isMap%22%3Atrue%7D%09%09%09%09%09',
"origin": "https://www.zillow.com",
"referer": "https://www.zillow.com",
"sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
}

View File

@@ -1,14 +1,6 @@
class InvalidSite(Exception):
"""Raised when a provided site is does not exist."""
class InvalidListingType(Exception):
"""Raised when a provided listing type is does not exist."""
class NoResultsFound(Exception):
"""Raised when no results are found for the given location"""
class GeoCoordsNotFound(Exception):
"""Raised when no property is found for the given address"""

View File

@@ -1,38 +1,72 @@
import re
from .core.scrapers.models import Property, ListingType
import pandas as pd
from .exceptions import InvalidListingType
ordered_properties = [
"property_url",
"mls",
"mls_id",
"status",
"style",
"street",
"unit",
"city",
"state",
"zip_code",
"beds",
"full_baths",
"half_baths",
"sqft",
"year_built",
"days_on_mls",
"list_price",
"list_date",
"sold_price",
"last_sold_date",
"lot_sqft",
"price_per_sqft",
"latitude",
"longitude",
"stories",
"hoa_fee",
"parking_garage",
]
def parse_address_one(street_address: str) -> tuple:
if not street_address:
return street_address, "#"
def process_result(result: Property) -> pd.DataFrame:
prop_data = {prop: None for prop in ordered_properties}
prop_data.update(result.__dict__)
apt_match = re.search(
r"(APT\s*[\dA-Z]+|#[\dA-Z]+|UNIT\s*[\dA-Z]+|LOT\s*[\dA-Z]+|SUITE\s*[\dA-Z]+)$",
street_address,
re.I,
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["street"] = address_data.street
prop_data["unit"] = address_data.unit
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
description = result.description
prop_data["style"] = description.style
prop_data["beds"] = description.beds
prop_data["full_baths"] = description.baths_full
prop_data["half_baths"] = description.baths_half
prop_data["sqft"] = description.sqft
prop_data["lot_sqft"] = description.lot_sqft
prop_data["sold_price"] = description.sold_price
prop_data["year_built"] = description.year_built
prop_data["parking_garage"] = description.garage
prop_data["stories"] = description.stories
properties_df = pd.DataFrame([prop_data])
properties_df = properties_df.reindex(columns=ordered_properties)
return properties_df[ordered_properties]
def validate_input(listing_type: str) -> None:
if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(
f"Provided listing type, '{listing_type}', does not exist."
)
if apt_match:
apt_str = apt_match.group().strip()
cleaned_apt_str = re.sub(r"(APT\s*|UNIT\s*|LOT\s*|SUITE\s*)", "#", apt_str, flags=re.I)
main_address = street_address.replace(apt_str, "").strip()
return main_address, cleaned_apt_str
else:
return street_address, "#"
def parse_address_two(street_address: str):
if not street_address:
return "#"
apt_match = re.search(
r"(APT\s*[\dA-Z]+|#[\dA-Z]+|UNIT\s*[\dA-Z]+|LOT\s*[\dA-Z]+|SUITE\s*[\dA-Z]+)$",
street_address,
re.I,
)
if apt_match:
apt_str = apt_match.group().strip()
apt_str = re.sub(r"(APT\s*|UNIT\s*|LOT\s*|SUITE\s*)", "#", apt_str, flags=re.I)
return apt_str
else:
return "#"

266
poetry.lock generated
View File

@@ -13,86 +13,101 @@ files = [
[[package]]
name = "charset-normalizer"
version = "3.2.0"
version = "3.3.0"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
optional = false
python-versions = ">=3.7.0"
files = [
{file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"},
{file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"},
{file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"},
{file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"},
{file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"},
{file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"},
{file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"},
{file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"},
{file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"},
{file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
{file = "charset-normalizer-3.3.0.tar.gz", hash = "sha256:63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6"},
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effe5406c9bd748a871dbcaf3ac69167c38d72db8c9baf3ff954c344f31c4cbe"},
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4162918ef3098851fcd8a628bf9b6a98d10c380725df9e04caf5ca6dd48c847a"},
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0570d21da019941634a531444364f2482e8db0b3425fcd5ac0c36565a64142c8"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5707a746c6083a3a74b46b3a631d78d129edab06195a92a8ece755aac25a3f3d"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:278c296c6f96fa686d74eb449ea1697f3c03dc28b75f873b65b5201806346a69"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4b71f4d1765639372a3b32d2638197f5cd5221b19531f9245fcc9ee62d38f56"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5969baeaea61c97efa706b9b107dcba02784b1601c74ac84f2a532ea079403e"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3f93dab657839dfa61025056606600a11d0b696d79386f974e459a3fbc568ec"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:db756e48f9c5c607b5e33dd36b1d5872d0422e960145b08ab0ec7fd420e9d649"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:232ac332403e37e4a03d209a3f92ed9071f7d3dbda70e2a5e9cff1c4ba9f0678"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e5c1502d4ace69a179305abb3f0bb6141cbe4714bc9b31d427329a95acfc8bdd"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:2502dd2a736c879c0f0d3e2161e74d9907231e25d35794584b1ca5284e43f596"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23e8565ab7ff33218530bc817922fae827420f143479b753104ab801145b1d5b"},
{file = "charset_normalizer-3.3.0-cp310-cp310-win32.whl", hash = "sha256:1872d01ac8c618a8da634e232f24793883d6e456a66593135aeafe3784b0848d"},
{file = "charset_normalizer-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:557b21a44ceac6c6b9773bc65aa1b4cc3e248a5ad2f5b914b91579a32e22204d"},
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d7eff0f27edc5afa9e405f7165f85a6d782d308f3b6b9d96016c010597958e63"},
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a685067d05e46641d5d1623d7c7fdf15a357546cbb2f71b0ebde91b175ffc3e"},
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d3d5b7db9ed8a2b11a774db2bbea7ba1884430a205dbd54a32d61d7c2a190fa"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2935ffc78db9645cb2086c2f8f4cfd23d9b73cc0dc80334bc30aac6f03f68f8c"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fe359b2e3a7729010060fbca442ca225280c16e923b37db0e955ac2a2b72a05"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:380c4bde80bce25c6e4f77b19386f5ec9db230df9f2f2ac1e5ad7af2caa70459"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0d1e3732768fecb052d90d62b220af62ead5748ac51ef61e7b32c266cac9293"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b2919306936ac6efb3aed1fbf81039f7087ddadb3160882a57ee2ff74fd2382"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f8888e31e3a85943743f8fc15e71536bda1c81d5aa36d014a3c0c44481d7db6e"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:82eb849f085624f6a607538ee7b83a6d8126df6d2f7d3b319cb837b289123078"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7b8b8bf1189b3ba9b8de5c8db4d541b406611a71a955bbbd7385bbc45fcb786c"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5adf257bd58c1b8632046bbe43ee38c04e1038e9d37de9c57a94d6bd6ce5da34"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c350354efb159b8767a6244c166f66e67506e06c8924ed74669b2c70bc8735b1"},
{file = "charset_normalizer-3.3.0-cp311-cp311-win32.whl", hash = "sha256:02af06682e3590ab952599fbadac535ede5d60d78848e555aa58d0c0abbde786"},
{file = "charset_normalizer-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:86d1f65ac145e2c9ed71d8ffb1905e9bba3a91ae29ba55b4c46ae6fc31d7c0d4"},
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3b447982ad46348c02cb90d230b75ac34e9886273df3a93eec0539308a6296d7"},
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:abf0d9f45ea5fb95051c8bfe43cb40cda383772f7e5023a83cc481ca2604d74e"},
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b09719a17a2301178fac4470d54b1680b18a5048b481cb8890e1ef820cb80455"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3d9b48ee6e3967b7901c052b670c7dda6deb812c309439adaffdec55c6d7b78"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edfe077ab09442d4ef3c52cb1f9dab89bff02f4524afc0acf2d46be17dc479f5"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3debd1150027933210c2fc321527c2299118aa929c2f5a0a80ab6953e3bd1908"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f63face3a527284f7bb8a9d4f78988e3c06823f7bea2bd6f0e0e9298ca0403"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24817cb02cbef7cd499f7c9a2735286b4782bd47a5b3516a0e84c50eab44b98e"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c71f16da1ed8949774ef79f4a0260d28b83b3a50c6576f8f4f0288d109777989"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9cf3126b85822c4e53aa28c7ec9869b924d6fcfb76e77a45c44b83d91afd74f9"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b3b2316b25644b23b54a6f6401074cebcecd1244c0b8e80111c9a3f1c8e83d65"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:03680bb39035fbcffe828eae9c3f8afc0428c91d38e7d61aa992ef7a59fb120e"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cc152c5dd831641e995764f9f0b6589519f6f5123258ccaca8c6d34572fefa8"},
{file = "charset_normalizer-3.3.0-cp312-cp312-win32.whl", hash = "sha256:b8f3307af845803fb0b060ab76cf6dd3a13adc15b6b451f54281d25911eb92df"},
{file = "charset_normalizer-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:8eaf82f0eccd1505cf39a45a6bd0a8cf1c70dcfc30dba338207a969d91b965c0"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dc45229747b67ffc441b3de2f3ae5e62877a282ea828a5bdb67883c4ee4a8810"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f4a0033ce9a76e391542c182f0d48d084855b5fcba5010f707c8e8c34663d77"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ada214c6fa40f8d800e575de6b91a40d0548139e5dc457d2ebb61470abf50186"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1121de0e9d6e6ca08289583d7491e7fcb18a439305b34a30b20d8215922d43c"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1063da2c85b95f2d1a430f1c33b55c9c17ffaf5e612e10aeaad641c55a9e2b9d"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70f1d09c0d7748b73290b29219e854b3207aea922f839437870d8cc2168e31cc"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:250c9eb0f4600361dd80d46112213dff2286231d92d3e52af1e5a6083d10cad9"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:750b446b2ffce1739e8578576092179160f6d26bd5e23eb1789c4d64d5af7dc7"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:fc52b79d83a3fe3a360902d3f5d79073a993597d48114c29485e9431092905d8"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:588245972aca710b5b68802c8cad9edaa98589b1b42ad2b53accd6910dad3545"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e39c7eb31e3f5b1f88caff88bcff1b7f8334975b46f6ac6e9fc725d829bc35d4"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-win32.whl", hash = "sha256:abecce40dfebbfa6abf8e324e1860092eeca6f7375c8c4e655a8afb61af58f2c"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a91a981f185721542a0b7c92e9054b7ab4fea0508a795846bc5b0abf8118d4"},
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:67b8cc9574bb518ec76dc8e705d4c39ae78bb96237cb533edac149352c1f39fe"},
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac71b2977fb90c35d41c9453116e283fac47bb9096ad917b8819ca8b943abecd"},
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3ae38d325b512f63f8da31f826e6cb6c367336f95e418137286ba362925c877e"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:542da1178c1c6af8873e143910e2269add130a299c9106eef2594e15dae5e482"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a85aed0b864ac88309b7d94be09f6046c834ef60762a8833b660139cfbad13"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aae32c93e0f64469f74ccc730a7cb21c7610af3a775157e50bbd38f816536b38"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b26ddf78d57f1d143bdf32e820fd8935d36abe8a25eb9ec0b5a71c82eb3895"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f5d10bae5d78e4551b7be7a9b29643a95aded9d0f602aa2ba584f0388e7a557"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:249c6470a2b60935bafd1d1d13cd613f8cd8388d53461c67397ee6a0f5dce741"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c5a74c359b2d47d26cdbbc7845e9662d6b08a1e915eb015d044729e92e7050b7"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:b5bcf60a228acae568e9911f410f9d9e0d43197d030ae5799e20dca8df588287"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:187d18082694a29005ba2944c882344b6748d5be69e3a89bf3cc9d878e548d5a"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:81bf654678e575403736b85ba3a7867e31c2c30a69bc57fe88e3ace52fb17b89"},
{file = "charset_normalizer-3.3.0-cp38-cp38-win32.whl", hash = "sha256:85a32721ddde63c9df9ebb0d2045b9691d9750cb139c161c80e500d210f5e26e"},
{file = "charset_normalizer-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:468d2a840567b13a590e67dd276c570f8de00ed767ecc611994c301d0f8c014f"},
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e0fc42822278451bc13a2e8626cf2218ba570f27856b536e00cfa53099724828"},
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09c77f964f351a7369cc343911e0df63e762e42bac24cd7d18525961c81754f4"},
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12ebea541c44fdc88ccb794a13fe861cc5e35d64ed689513a5c03d05b53b7c82"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:805dfea4ca10411a5296bcc75638017215a93ffb584c9e344731eef0dcfb026a"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96c2b49eb6a72c0e4991d62406e365d87067ca14c1a729a870d22354e6f68115"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaf7b34c5bc56b38c931a54f7952f1ff0ae77a2e82496583b247f7c969eb1479"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:619d1c96099be5823db34fe89e2582b336b5b074a7f47f819d6b3a57ff7bdb86"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0ac5e7015a5920cfce654c06618ec40c33e12801711da6b4258af59a8eff00a"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93aa7eef6ee71c629b51ef873991d6911b906d7312c6e8e99790c0f33c576f89"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7966951325782121e67c81299a031f4c115615e68046f79b85856b86ebffc4cd"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:02673e456dc5ab13659f85196c534dc596d4ef260e4d86e856c3b2773ce09843"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c2af80fb58f0f24b3f3adcb9148e6203fa67dd3f61c4af146ecad033024dde43"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:153e7b6e724761741e0974fc4dcd406d35ba70b92bfe3fedcb497226c93b9da7"},
{file = "charset_normalizer-3.3.0-cp39-cp39-win32.whl", hash = "sha256:d47ecf253780c90ee181d4d871cd655a789da937454045b17b5798da9393901a"},
{file = "charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:d97d85fa63f315a8bdaba2af9a6a686e0eceab77b3089af45133252618e70884"},
{file = "charset_normalizer-3.3.0-py3-none-any.whl", hash = "sha256:e46cd37076971c1040fc8c41273a8b3e2c624ce4f2be3f5dfcb7a430c1d3acc2"},
]
[[package]]
@@ -153,40 +168,6 @@ files = [
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
]
[[package]]
name = "numpy"
version = "1.25.2"
description = "Fundamental package for array computing in Python"
optional = false
python-versions = ">=3.9"
files = [
{file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"},
{file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"},
{file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"},
{file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"},
{file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"},
{file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"},
{file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"},
{file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"},
{file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"},
{file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"},
{file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"},
{file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"},
{file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"},
{file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"},
{file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"},
{file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"},
{file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"},
{file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"},
{file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"},
{file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"},
{file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"},
{file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"},
{file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"},
{file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"},
{file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
]
[[package]]
name = "numpy"
version = "1.26.0"
@@ -244,47 +225,54 @@ et-xmlfile = "*"
[[package]]
name = "packaging"
version = "23.1"
version = "23.2"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.7"
files = [
{file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
{file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
{file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
]
[[package]]
name = "pandas"
version = "2.1.0"
version = "2.1.1"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
python-versions = ">=3.9"
files = [
{file = "pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242"},
{file = "pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f"},
{file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09"},
{file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d81e1813191070440d4c7a413cb673052b3b4a984ffd86b8dd468c45742d3cc"},
{file = "pandas-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eb20252720b1cc1b7d0b2879ffc7e0542dd568f24d7c4b2347cb035206936421"},
{file = "pandas-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:38f74ef7ebc0ffb43b3d633e23d74882bce7e27bfa09607f3c5d3e03ffd9a4a5"},
{file = "pandas-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cda72cc8c4761c8f1d97b169661f23a86b16fdb240bdc341173aee17e4d6cedd"},
{file = "pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d97daeac0db8c993420b10da4f5f5b39b01fc9ca689a17844e07c0a35ac96b4b"},
{file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c58b1113892e0c8078f006a167cc210a92bdae23322bb4614f2f0b7a4b510f"},
{file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629124923bcf798965b054a540f9ccdfd60f71361255c81fa1ecd94a904b9dd3"},
{file = "pandas-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:70cf866af3ab346a10debba8ea78077cf3a8cd14bd5e4bed3d41555a3280041c"},
{file = "pandas-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53c8c1001f6a192ff1de1efe03b31a423d0eee2e9e855e69d004308e046e694"},
{file = "pandas-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86f100b3876b8c6d1a2c66207288ead435dc71041ee4aea789e55ef0e06408cb"},
{file = "pandas-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28f330845ad21c11db51e02d8d69acc9035edfd1116926ff7245c7215db57957"},
{file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9a6ccf0963db88f9b12df6720e55f337447aea217f426a22d71f4213a3099a6"},
{file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99e678180bc59b0c9443314297bddce4ad35727a1a2656dbe585fd78710b3b9"},
{file = "pandas-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b31da36d376d50a1a492efb18097b9101bdbd8b3fbb3f49006e02d4495d4c644"},
{file = "pandas-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0164b85937707ec7f70b34a6c3a578dbf0f50787f910f21ca3b26a7fd3363437"},
{file = "pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918"},
{file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
{file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
{file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
{file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
{file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
{file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
{file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
{file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
{file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
{file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
{file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
{file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
{file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
{file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
{file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
{file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
{file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
]
[package.dependencies]
numpy = [
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version >= \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@@ -432,13 +420,13 @@ files = [
[[package]]
name = "urllib3"
version = "2.0.4"
version = "2.0.6"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.7"
files = [
{file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"},
{file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"},
{file = "urllib3-2.0.6-py3-none-any.whl", hash = "sha256:7a7c7003b000adf9e7ca2a377c9688bbc54ed41b985789ed576570342a375cd2"},
{file = "urllib3-2.0.6.tar.gz", hash = "sha256:b19e1a85d206b56d7df1d5e683df4a7725252a964e3993648dd0fb5a1c157564"},
]
[package.extras]
@@ -449,5 +437,5 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "3647d568f5623dd762f19029230626a62e68309fa2ef8be49a36382c19264a5f"
python-versions = ">=3.10,<3.13"
content-hash = "09ad811d74a42363ff4c3ccd012d8f73c89d7d978e5a6445b0f3d2e231922f1b"

View File

@@ -1,18 +1,18 @@
[tool.poetry]
name = "homeharvest"
version = "0.2.8"
version = "0.3.6"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
homepage = "https://github.com/Bunsly/HomeHarvest"
readme = "README.md"
[tool.poetry.scripts]
homeharvest = "homeharvest.cli:main"
[tool.poetry.dependencies]
python = "^3.10"
python = ">=3.10,<3.13"
requests = "^2.31.0"
pandas = "^2.1.0"
pandas = "^2.1.1"
openpyxl = "^3.1.2"

View File

@@ -1,26 +1,108 @@
from homeharvest import scrape_property
from homeharvest.exceptions import (
InvalidSite,
InvalidListingType,
NoResultsFound,
GeoCoordsNotFound,
)
def test_realtor_pending_or_contingent():
pending_or_contingent_result = scrape_property(
location="Surprise, AZ", listing_type="pending"
)
regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale")
assert all(
[
result is not None
for result in [pending_or_contingent_result, regular_result]
]
)
assert len(pending_or_contingent_result) != len(regular_result)
def test_realtor_pending_comps():
pending_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="pending",
)
for_sale_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="for_sale",
)
sold_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="sold",
)
results = [pending_comps, for_sale_comps, sold_comps]
assert all([result is not None for result in results])
#: assert all lengths are different
assert len(set([len(result) for result in results])) == len(results)
def test_realtor_comps():
result = scrape_property(
location="2530 Al Lipscomb Way",
radius=0.5,
past_days=180,
listing_type="sold",
)
assert result is not None and len(result) > 0
def test_realtor_last_x_days_sold():
days_result_30 = scrape_property(
location="Dallas, TX", listing_type="sold", past_days=30
)
days_result_10 = scrape_property(
location="Dallas, TX", listing_type="sold", past_days=10
)
assert all(
[result is not None for result in [days_result_30, days_result_10]]
) and len(days_result_30) != len(days_result_10)
def test_realtor_single_property():
results = [
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
listing_type="for_sale",
),
scrape_property(
location="2530 Al Lipscomb Way",
listing_type="for_sale",
),
]
assert all([result is not None for result in results])
def test_realtor():
results = [
scrape_property(
location="2530 Al Lipscomb Way",
site_name="realtor.com",
listing_type="for_sale",
),
scrape_property(
location="Phoenix, AZ", site_name=["realtor.com"], listing_type="for_rent"
location="Phoenix, AZ", listing_type="for_rent"
), #: does not support "city, state, USA" format
scrape_property(
location="Dallas, TX", site_name="realtor.com", listing_type="sold"
location="Dallas, TX", listing_type="sold"
), #: does not support "city, state, USA" format
scrape_property(location="85281", site_name="realtor.com"),
scrape_property(location="85281"),
]
assert all([result is not None for result in results])
@@ -30,11 +112,10 @@ def test_realtor():
bad_results += [
scrape_property(
location="abceefg ju098ot498hh9",
site_name="realtor.com",
listing_type="for_sale",
)
]
except (InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound):
except (InvalidListingType, NoResultsFound):
assert True
assert all([result is None for result in bad_results])

View File

@@ -1,32 +0,0 @@
from homeharvest import scrape_property
from homeharvest.exceptions import (
InvalidSite,
InvalidListingType,
NoResultsFound,
GeoCoordsNotFound,
)
def test_redfin():
results = [
scrape_property(location="2530 Al Lipscomb Way", site_name="redfin", listing_type="for_sale"),
scrape_property(location="Phoenix, AZ, USA", site_name=["redfin"], listing_type="for_rent"),
scrape_property(location="Dallas, TX, USA", site_name="redfin", listing_type="sold"),
scrape_property(location="85281", site_name="redfin"),
]
assert all([result is not None for result in results])
bad_results = []
try:
bad_results += [
scrape_property(
location="abceefg ju098ot498hh9",
site_name="redfin",
listing_type="for_sale",
)
]
except (InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound):
assert True
assert all([result is None for result in bad_results])

View File

@@ -1,24 +0,0 @@
from homeharvest.utils import parse_address_one, parse_address_two
def test_parse_address_one():
test_data = [
("4303 E Cactus Rd Apt 126", ("4303 E Cactus Rd", "#126")),
("1234 Elm Street apt 2B", ("1234 Elm Street", "#2B")),
("1234 Elm Street UNIT 3A", ("1234 Elm Street", "#3A")),
("1234 Elm Street unit 3A", ("1234 Elm Street", "#3A")),
("1234 Elm Street SuIte 3A", ("1234 Elm Street", "#3A")),
]
for input_data, (exp_addr_one, exp_addr_two) in test_data:
address_one, address_two = parse_address_one(input_data)
assert address_one == exp_addr_one
assert address_two == exp_addr_two
def test_parse_address_two():
test_data = [("Apt 126", "#126"), ("apt 2B", "#2B"), ("UNIT 3A", "#3A"), ("unit 3A", "#3A"), ("SuIte 3A", "#3A")]
for input_data, expected in test_data:
output = parse_address_two(input_data)
assert output == expected

View File

@@ -1,32 +0,0 @@
from homeharvest import scrape_property
from homeharvest.exceptions import (
InvalidSite,
InvalidListingType,
NoResultsFound,
GeoCoordsNotFound,
)
def test_zillow():
results = [
scrape_property(location="2530 Al Lipscomb Way", site_name="zillow", listing_type="for_sale"),
scrape_property(location="Phoenix, AZ, USA", site_name=["zillow"], listing_type="for_rent"),
scrape_property(location="Dallas, TX, USA", site_name="zillow", listing_type="sold"),
scrape_property(location="85281", site_name="zillow"),
]
assert all([result is not None for result in results])
bad_results = []
try:
bad_results += [
scrape_property(
location="abceefg ju098ot498hh9",
site_name="zillow",
listing_type="for_sale",
)
]
except (InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound):
assert True
assert all([result is None for result in bad_results])