mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-05 03:54:29 -08:00
Compare commits
161 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f726548cc6 | ||
|
|
fad7d670eb | ||
|
|
89a6f93c9f | ||
|
|
e1090b06e4 | ||
|
|
5036e74b60 | ||
|
|
2cb544bc8d | ||
|
|
68cb365e03 | ||
|
|
23876d5725 | ||
|
|
b59d55f6b5 | ||
|
|
3c3adb5f29 | ||
|
|
6ede8622cc | ||
|
|
9f50d33bdb | ||
|
|
735ec021f7 | ||
|
|
00537329cf | ||
|
|
a9225b532f | ||
|
|
ba7ad069c9 | ||
|
|
22bda972b0 | ||
|
|
6f5bbf79a4 | ||
|
|
608cceba34 | ||
|
|
3609586995 | ||
|
|
68c7e411e4 | ||
|
|
5e825601a7 | ||
|
|
ce3f94d0af | ||
|
|
4a1116440d | ||
|
|
2d092c595f | ||
|
|
4dbb064fe9 | ||
|
|
4e78248032 | ||
|
|
37e20f4469 | ||
|
|
8a5f0dc2c9 | ||
|
|
de692faae2 | ||
|
|
6bb68766fc | ||
|
|
446d5488b8 | ||
|
|
68e15ce696 | ||
|
|
c4870677c2 | ||
|
|
51bde20c3c | ||
|
|
f8c0dd766d | ||
|
|
f06a01678c | ||
|
|
d2879734e6 | ||
|
|
bf81ef413f | ||
|
|
29664e4eee | ||
|
|
088088ae51 | ||
|
|
40bbf76db1 | ||
|
|
1f1ca8068f | ||
|
|
8388d47f73 | ||
|
|
ba503b0ca3 | ||
|
|
8962d619e1 | ||
|
|
3b7c17b7b5 | ||
|
|
59317fd6fc | ||
|
|
928b431d1f | ||
|
|
896f862137 | ||
|
|
3174f5076c | ||
|
|
2abbb913a8 | ||
|
|
73b6d5b33f | ||
|
|
da39c989d9 | ||
|
|
01c53f9399 | ||
|
|
9200c17df2 | ||
|
|
9e262bf214 | ||
|
|
82f78fb578 | ||
|
|
b0e40df00a | ||
|
|
2fc40e0dad | ||
|
|
254f3a68a1 | ||
|
|
05713c76b0 | ||
|
|
9120cc9bfe | ||
|
|
eee4b19515 | ||
|
|
c25961eded | ||
|
|
0884c3d163 | ||
|
|
8f37bfdeb8 | ||
|
|
48c2338276 | ||
|
|
f58a1f4a74 | ||
|
|
4cef926d7d | ||
|
|
e82eeaa59f | ||
|
|
644f16b25b | ||
|
|
e9ddc6df92 | ||
|
|
50fb1c391d | ||
|
|
4f91f9dadb | ||
|
|
66e55173b1 | ||
|
|
f6054e8746 | ||
|
|
e8d9235ee6 | ||
|
|
043f091158 | ||
|
|
eae8108978 | ||
|
|
0a39357a07 | ||
|
|
8f06d46ddb | ||
|
|
0dae14ccfc | ||
|
|
9aaabdd5d8 | ||
|
|
cdf41fe9f2 | ||
|
|
1f0feb836d | ||
|
|
5f31beda46 | ||
|
|
fd9cdea499 | ||
|
|
93a1cbe17f | ||
|
|
49d27943c4 | ||
|
|
05fca9b7e6 | ||
|
|
20ce44fb3a | ||
|
|
52017c1bb5 | ||
|
|
dba1c03081 | ||
|
|
1fc2d8c549 | ||
|
|
02d112eea0 | ||
|
|
30e510882b | ||
|
|
78b56c2cac | ||
|
|
087854a688 | ||
|
|
80586467a8 | ||
|
|
3494b152b8 | ||
|
|
6c6fef80ed | ||
|
|
62e3321277 | ||
|
|
80186ee8c5 | ||
|
|
3ec47c5b6a | ||
|
|
42e8ac4de9 | ||
|
|
e1917009ae | ||
|
|
7297f0eb33 | ||
|
|
2eec389838 | ||
|
|
b01162161d | ||
|
|
906ce92685 | ||
|
|
cc76e067b2 | ||
|
|
1f0c351974 | ||
|
|
a1684f87db | ||
|
|
2ae3ebe28e | ||
|
|
ae3961514b | ||
|
|
0621b01d9a | ||
|
|
fbbd56d930 | ||
|
|
82092faa28 | ||
|
|
8f90a80b0a | ||
|
|
d5b4d80f96 | ||
|
|
086bcfd224 | ||
|
|
4726764482 | ||
|
|
ca260fd2b4 | ||
|
|
94e5b090da | ||
|
|
d0a6a66b6a | ||
|
|
8e140a0e45 | ||
|
|
588689c230 | ||
|
|
c7a4bfd5e4 | ||
|
|
fe351ab57c | ||
|
|
5d0f519a85 | ||
|
|
869d7e7c51 | ||
|
|
ffd3ce6aed | ||
|
|
471e53118e | ||
|
|
dc8c15959f | ||
|
|
10c01f373e | ||
|
|
fd01bfb8b8 | ||
|
|
c3c6bdd2c5 | ||
|
|
29897b8fbe | ||
|
|
54af03c86a | ||
|
|
6b02394e95 | ||
|
|
ba249ca20d | ||
|
|
ba9fe806a7 | ||
|
|
905cfcae2c | ||
|
|
3697b7cf2d | ||
|
|
b76c659f94 | ||
|
|
a433e46258 | ||
|
|
df3519ae18 | ||
|
|
2f5ea1ca88 | ||
|
|
2f3b012747 | ||
|
|
5ea0fa0bdb | ||
|
|
2d6e746ae9 | ||
|
|
a772fe45aa | ||
|
|
4764b6bd37 | ||
|
|
0946abd35a | ||
|
|
0a2fb4cb31 | ||
|
|
af1f2fa531 | ||
|
|
8107103383 | ||
|
|
469b703288 | ||
|
|
a79c4c6872 | ||
|
|
ed7e76e4b0 |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -1,2 +1,7 @@
|
||||
/.idea
|
||||
dist
|
||||
**/dist/
|
||||
**/__pycache__/
|
||||
**/.pytest_cache/
|
||||
*.pyc
|
||||
/.ipynb_checkpoints/
|
||||
*.csv
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 Zachary Hampton
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
189
README.md
189
README.md
@@ -1 +1,188 @@
|
||||
# HomeHarvest
|
||||
<img src="https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/d1a2bf8b-09f5-4c57-b33a-0ada8a34f12d" width="400">
|
||||
|
||||
**HomeHarvest** is a simple, yet comprehensive, real estate scraping library that extracts and formats data in the style of MLS listings.
|
||||
|
||||
[](https://replit.com/@ZacharyHampton/HomeHarvestDemo)
|
||||
|
||||
**Not technical?** Try out the web scraping tool on our site at [tryhomeharvest.com](https://tryhomeharvest.com).
|
||||
|
||||
*Looking to build a data-focused software product?* **[Book a call](https://calendly.com/bunsly/15min)** *to work with us.*
|
||||
|
||||
Check out another project we wrote: ***[JobSpy](https://github.com/Bunsly/JobSpy)** – a Python package for job scraping*
|
||||
|
||||
## HomeHarvest Features
|
||||
|
||||
- **Source**: Fetches properties directly from **Realtor.com**.
|
||||
- **Data Format**: Structures data to resemble MLS listings.
|
||||
- **Export Flexibility**: Options to save as either CSV or Excel.
|
||||
- **Usage Modes**:
|
||||
- **Python**: For those who'd like to integrate scraping into their Python scripts.
|
||||
- **CLI**: For users who prefer command-line operations.
|
||||
|
||||
|
||||
[Video Guide for HomeHarvest](https://youtu.be/J1qgNPgmSLI) - _updated for release v0.3.4_
|
||||
|
||||

|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install homeharvest
|
||||
```
|
||||
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
|
||||
|
||||
## Usage
|
||||
|
||||
### Python
|
||||
|
||||
```py
|
||||
from homeharvest import scrape_property
|
||||
from datetime import datetime
|
||||
|
||||
# Generate filename based on current timestamp
|
||||
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"HomeHarvest_{current_timestamp}.csv"
|
||||
|
||||
properties = scrape_property(
|
||||
location="San Diego, CA",
|
||||
listing_type="sold", # or (for_sale, for_rent, pending)
|
||||
past_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
|
||||
# mls_only=True, # only fetch MLS listings
|
||||
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
|
||||
)
|
||||
print(f"Number of properties: {len(properties)}")
|
||||
|
||||
# Export to csv
|
||||
properties.to_csv(filename, index=False)
|
||||
print(properties.head())
|
||||
```
|
||||
|
||||
### CLI
|
||||
|
||||
```
|
||||
usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] [-c] location
|
||||
|
||||
Home Harvest Property Scraper
|
||||
|
||||
positional arguments:
|
||||
location Location to scrape (e.g., San Francisco, CA)
|
||||
|
||||
options:
|
||||
-l {for_sale,for_rent,sold,pending}, --listing_type {for_sale,for_rent,sold,pending}
|
||||
Listing type to scrape
|
||||
-o {excel,csv}, --output {excel,csv}
|
||||
Output format
|
||||
-f FILENAME, --filename FILENAME
|
||||
Name of the output file (without extension)
|
||||
-p PROXY, --proxy PROXY
|
||||
Proxy to use for scraping
|
||||
-d DAYS, --days DAYS Sold/listed in last _ days filter.
|
||||
-r RADIUS, --radius RADIUS
|
||||
Get comparable properties within _ (e.g., 0.0) miles. Only applicable for individual addresses.
|
||||
-m, --mls_only If set, fetches only MLS listings.
|
||||
```
|
||||
```bash
|
||||
homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest
|
||||
```
|
||||
|
||||
|
||||
## Output
|
||||
```plaintext
|
||||
>>> properties.head()
|
||||
MLS MLS # Status Style ... COEDate LotSFApx PrcSqft Stories
|
||||
0 SDCA 230018348 SOLD CONDOS ... 2023-10-03 290110 803 2
|
||||
1 SDCA 230016614 SOLD TOWNHOMES ... 2023-10-03 None 838 3
|
||||
2 SDCA 230016367 SOLD CONDOS ... 2023-10-03 30056 649 1
|
||||
3 MRCA NDP2306335 SOLD SINGLE_FAMILY ... 2023-10-03 7519 661 2
|
||||
4 SDCA 230014532 SOLD CONDOS ... 2023-10-03 None 752 1
|
||||
[5 rows x 22 columns]
|
||||
```
|
||||
|
||||
### Parameters for `scrape_property()`
|
||||
```
|
||||
Required
|
||||
├── location (str): The address in various formats - this could be just a zip code, a full address, or city/state, etc.
|
||||
└── listing_type (option): Choose the type of listing.
|
||||
- 'for_rent'
|
||||
- 'for_sale'
|
||||
- 'sold'
|
||||
- 'pending'
|
||||
|
||||
Optional
|
||||
├── radius (decimal): Radius in miles to find comparable properties based on individual addresses.
|
||||
│ Example: 5.5 (fetches properties within a 5.5-mile radius if location is set to a specific address; otherwise, ignored)
|
||||
│
|
||||
├── past_days (integer): Number of past days to filter properties. Utilizes 'last_sold_date' for 'sold' listing types, and 'list_date' for others (for_rent, for_sale).
|
||||
│ Example: 30 (fetches properties listed/sold in the last 30 days)
|
||||
│
|
||||
├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings)
|
||||
│
|
||||
└── proxy (string): In format 'http://user:pass@host:port'
|
||||
|
||||
```
|
||||
### Property Schema
|
||||
```plaintext
|
||||
Property
|
||||
├── Basic Information:
|
||||
│ ├── property_url
|
||||
│ ├── mls
|
||||
│ ├── mls_id
|
||||
│ └── status
|
||||
|
||||
├── Address Details:
|
||||
│ ├── street
|
||||
│ ├── unit
|
||||
│ ├── city
|
||||
│ ├── state
|
||||
│ └── zip_code
|
||||
|
||||
├── Property Description:
|
||||
│ ├── style
|
||||
│ ├── beds
|
||||
│ ├── full_baths
|
||||
│ ├── half_baths
|
||||
│ ├── sqft
|
||||
│ ├── year_built
|
||||
│ ├── stories
|
||||
│ └── lot_sqft
|
||||
|
||||
├── Property Listing Details:
|
||||
│ ├── days_on_mls
|
||||
│ ├── list_price
|
||||
│ ├── list_date
|
||||
│ ├── sold_price
|
||||
│ ├── last_sold_date
|
||||
│ ├── price_per_sqft
|
||||
│ └── hoa_fee
|
||||
|
||||
├── Location Details:
|
||||
│ ├── latitude
|
||||
│ ├── longitude
|
||||
|
||||
└── Parking Details:
|
||||
└── parking_garage
|
||||
```
|
||||
|
||||
### Exceptions
|
||||
The following exceptions may be raised when using HomeHarvest:
|
||||
|
||||
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
|
||||
- `NoResultsFound` - no properties found from your search
|
||||
|
||||
|
||||
## Frequently Asked Questions
|
||||
---
|
||||
|
||||
**Q: Encountering issues with your searches?**
|
||||
**A:** Try to broaden the parameters you're using. If problems persist, [submit an issue](https://github.com/ZacharyHampton/HomeHarvest/issues).
|
||||
|
||||
---
|
||||
|
||||
**Q: Received a Forbidden 403 response code?**
|
||||
**A:** This indicates that you have been blocked by Realtor.com for sending too many requests. We recommend:
|
||||
|
||||
- Waiting a few seconds between requests.
|
||||
- Trying a VPN or useing a proxy as a parameter to scrape_property() to change your IP address.
|
||||
|
||||
---
|
||||
|
||||
|
||||
141
examples/HomeHarvest_Demo.ipynb
Normal file
141
examples/HomeHarvest_Demo.ipynb
Normal file
@@ -0,0 +1,141 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb48903e-5021-49fe-9688-45cd0bc05d0f",
|
||||
"metadata": {
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from homeharvest import scrape_property\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "156488ce-0d5f-43c5-87f4-c33e9c427860",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd.set_option('display.max_columns', None) # Show all columns\n",
|
||||
"pd.set_option('display.max_rows', None) # Show all rows\n",
|
||||
"pd.set_option('display.width', None) # Auto-adjust display width to fit console\n",
|
||||
"pd.set_option('display.max_colwidth', 50) # Limit max column width to 50 characters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1c8b9744-8606-4e9b-8add-b90371a249a7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# check for sale properties\n",
|
||||
"scrape_property(\n",
|
||||
" location=\"dallas\",\n",
|
||||
" listing_type=\"for_sale\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aaf86093",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# search a specific address\n",
|
||||
"scrape_property(\n",
|
||||
" location=\"2530 Al Lipscomb Way\",\n",
|
||||
" listing_type=\"for_sale\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ab7b4c21-da1d-4713-9df4-d7425d8ce21e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# check rentals\n",
|
||||
"scrape_property(\n",
|
||||
" location=\"chicago, illinois\",\n",
|
||||
" listing_type=\"for_rent\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "af280cd3",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# check sold properties\n",
|
||||
"properties = scrape_property(\n",
|
||||
" location=\"90210\",\n",
|
||||
" listing_type=\"sold\",\n",
|
||||
" past_days=10\n",
|
||||
")\n",
|
||||
"display(properties)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "628c1ce2",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"is_executing": true,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# display clickable URLs\n",
|
||||
"from IPython.display import display, HTML\n",
|
||||
"properties['property_url'] = '<a href=\"' + properties['property_url'] + '\" target=\"_blank\">' + properties['property_url'] + '</a>'\n",
|
||||
"\n",
|
||||
"html = properties.to_html(escape=False)\n",
|
||||
"truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'\n",
|
||||
"display(HTML(truncate_width))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
20
examples/HomeHarvest_Demo.py
Normal file
20
examples/HomeHarvest_Demo.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from homeharvest import scrape_property
|
||||
from datetime import datetime
|
||||
|
||||
# Generate filename based on current timestamp
|
||||
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"HomeHarvest_{current_timestamp}.csv"
|
||||
|
||||
properties = scrape_property(
|
||||
location="San Diego, CA",
|
||||
listing_type="sold", # or (for_sale, for_rent)
|
||||
past_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
|
||||
# pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
|
||||
# mls_only=True, # only fetch MLS listings
|
||||
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
|
||||
)
|
||||
print(f"Number of properties: {len(properties)}")
|
||||
|
||||
# Export to csv
|
||||
properties.to_csv(filename, index=False)
|
||||
print(properties.head())
|
||||
@@ -0,0 +1,47 @@
|
||||
import warnings
|
||||
import pandas as pd
|
||||
from .core.scrapers import ScraperInput
|
||||
from .utils import process_result, ordered_properties, validate_input
|
||||
from .core.scrapers.realtor import RealtorScraper
|
||||
from .core.scrapers.models import ListingType
|
||||
from .exceptions import InvalidListingType, NoResultsFound
|
||||
|
||||
|
||||
def scrape_property(
|
||||
location: str,
|
||||
listing_type: str = "for_sale",
|
||||
radius: float = None,
|
||||
mls_only: bool = False,
|
||||
past_days: int = None,
|
||||
proxy: str = None,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Scrape properties from Realtor.com based on a given location and listing type.
|
||||
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
|
||||
:param listing_type: Listing Type (for_sale, for_rent, sold)
|
||||
:param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
|
||||
:param mls_only: If set, fetches only listings with MLS IDs.
|
||||
:param past_days: Get properties sold or listed (dependent on your listing_type) in the last _ days.
|
||||
:param proxy: Proxy to use for scraping
|
||||
"""
|
||||
validate_input(listing_type)
|
||||
|
||||
scraper_input = ScraperInput(
|
||||
location=location,
|
||||
listing_type=ListingType[listing_type.upper()],
|
||||
proxy=proxy,
|
||||
radius=radius,
|
||||
mls_only=mls_only,
|
||||
last_x_days=past_days,
|
||||
)
|
||||
|
||||
site = RealtorScraper(scraper_input)
|
||||
results = site.search()
|
||||
|
||||
properties_dfs = [process_result(result) for result in results]
|
||||
if not properties_dfs:
|
||||
raise NoResultsFound("no results found for the query")
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", category=FutureWarning)
|
||||
return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties]
|
||||
|
||||
89
homeharvest/cli.py
Normal file
89
homeharvest/cli.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import argparse
|
||||
import datetime
|
||||
from homeharvest import scrape_property
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
|
||||
parser.add_argument(
|
||||
"location", type=str, help="Location to scrape (e.g., San Francisco, CA)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--listing_type",
|
||||
type=str,
|
||||
default="for_sale",
|
||||
choices=["for_sale", "for_rent", "sold", "pending"],
|
||||
help="Listing type to scrape",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
type=str,
|
||||
default="excel",
|
||||
choices=["excel", "csv"],
|
||||
help="Output format",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--filename",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Name of the output file (without extension)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-p", "--proxy", type=str, default=None, help="Proxy to use for scraping"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--days",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Sold/listed in last _ days filter.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--radius",
|
||||
type=float,
|
||||
default=None,
|
||||
help="Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--mls_only",
|
||||
action="store_true",
|
||||
help="If set, fetches only MLS listings.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
result = scrape_property(
|
||||
args.location,
|
||||
args.listing_type,
|
||||
radius=args.radius,
|
||||
proxy=args.proxy,
|
||||
mls_only=args.mls_only,
|
||||
past_days=args.days,
|
||||
)
|
||||
|
||||
if not args.filename:
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
args.filename = f"HomeHarvest_{timestamp}"
|
||||
|
||||
if args.output == "excel":
|
||||
output_filename = f"{args.filename}.xlsx"
|
||||
result.to_excel(output_filename, index=False)
|
||||
print(f"Excel file saved as {output_filename}")
|
||||
elif args.output == "csv":
|
||||
output_filename = f"{args.filename}.csv"
|
||||
result.to_csv(output_filename, index=False)
|
||||
print(f"CSV file saved as {output_filename}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,48 @@
|
||||
from dataclasses import dataclass
|
||||
import requests
|
||||
from .models import Property, ListingType, SiteName
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScraperInput:
|
||||
location: str
|
||||
listing_type: ListingType
|
||||
radius: float | None = None
|
||||
mls_only: bool | None = None
|
||||
proxy: str | None = None
|
||||
last_x_days: int | None = None
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(
|
||||
self,
|
||||
scraper_input: ScraperInput,
|
||||
session: requests.Session = None,
|
||||
):
|
||||
self.location = scraper_input.location
|
||||
self.listing_type = scraper_input.listing_type
|
||||
|
||||
if not session:
|
||||
self.session = requests.Session()
|
||||
else:
|
||||
self.session = session
|
||||
|
||||
if scraper_input.proxy:
|
||||
proxy_url = scraper_input.proxy
|
||||
proxies = {"http": proxy_url, "https": proxy_url}
|
||||
self.session.proxies.update(proxies)
|
||||
|
||||
self.listing_type = scraper_input.listing_type
|
||||
self.radius = scraper_input.radius
|
||||
self.last_x_days = scraper_input.last_x_days
|
||||
self.mls_only = scraper_input.mls_only
|
||||
|
||||
def search(self) -> list[Property]:
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
def _parse_home(home) -> Property:
|
||||
...
|
||||
|
||||
def handle_location(self):
|
||||
...
|
||||
|
||||
67
homeharvest/core/scrapers/models.py
Normal file
67
homeharvest/core/scrapers/models.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class SiteName(Enum):
|
||||
ZILLOW = "zillow"
|
||||
REDFIN = "redfin"
|
||||
REALTOR = "realtor.com"
|
||||
|
||||
@classmethod
|
||||
def get_by_value(cls, value):
|
||||
for item in cls:
|
||||
if item.value == value:
|
||||
return item
|
||||
raise ValueError(f"{value} not found in {cls}")
|
||||
|
||||
|
||||
class ListingType(Enum):
|
||||
FOR_SALE = "FOR_SALE"
|
||||
FOR_RENT = "FOR_RENT"
|
||||
PENDING = "PENDING"
|
||||
SOLD = "SOLD"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Address:
|
||||
street: str | None = None
|
||||
unit: str | None = None
|
||||
city: str | None = None
|
||||
state: str | None = None
|
||||
zip: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Description:
|
||||
style: str | None = None
|
||||
beds: int | None = None
|
||||
baths_full: int | None = None
|
||||
baths_half: int | None = None
|
||||
sqft: int | None = None
|
||||
lot_sqft: int | None = None
|
||||
sold_price: int | None = None
|
||||
year_built: int | None = None
|
||||
garage: float | None = None
|
||||
stories: int | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Property:
|
||||
property_url: str
|
||||
mls: str | None = None
|
||||
mls_id: str | None = None
|
||||
status: str | None = None
|
||||
address: Address | None = None
|
||||
|
||||
list_price: int | None = None
|
||||
list_date: str | None = None
|
||||
last_sold_date: str | None = None
|
||||
prc_sqft: int | None = None
|
||||
hoa_fee: int | None = None
|
||||
days_on_mls: int | None = None
|
||||
description: Description | None = None
|
||||
|
||||
latitude: float | None = None
|
||||
longitude: float | None = None
|
||||
neighborhoods: Optional[str] = None
|
||||
645
homeharvest/core/scrapers/realtor/__init__.py
Normal file
645
homeharvest/core/scrapers/realtor/__init__.py
Normal file
@@ -0,0 +1,645 @@
|
||||
"""
|
||||
homeharvest.realtor.__init__
|
||||
~~~~~~~~~~~~
|
||||
|
||||
This module implements the scraper for realtor.com
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Dict, Union, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from .. import Scraper
|
||||
from ....exceptions import NoResultsFound
|
||||
from ..models import Property, Address, ListingType, Description
|
||||
|
||||
|
||||
class RealtorScraper(Scraper):
|
||||
SEARCH_GQL_URL = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
|
||||
PROPERTY_URL = "https://www.realtor.com/realestateandhomes-detail/"
|
||||
ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest"
|
||||
|
||||
def __init__(self, scraper_input):
|
||||
super().__init__(scraper_input)
|
||||
|
||||
def handle_location(self):
|
||||
params = {
|
||||
"input": self.location,
|
||||
"client_id": self.listing_type.value.lower().replace("_", "-"),
|
||||
"limit": "1",
|
||||
"area_types": "city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park",
|
||||
}
|
||||
|
||||
response = self.session.get(
|
||||
self.ADDRESS_AUTOCOMPLETE_URL,
|
||||
params=params,
|
||||
)
|
||||
response_json = response.json()
|
||||
|
||||
result = response_json["autocomplete"]
|
||||
|
||||
if not result:
|
||||
raise NoResultsFound("No results found for location: " + self.location)
|
||||
|
||||
return result[0]
|
||||
|
||||
def handle_listing(self, listing_id: str) -> list[Property]:
|
||||
query = """query Listing($listing_id: ID!) {
|
||||
listing(id: $listing_id) {
|
||||
source {
|
||||
id
|
||||
listing_id
|
||||
}
|
||||
address {
|
||||
street_number
|
||||
street_name
|
||||
street_suffix
|
||||
unit
|
||||
city
|
||||
state_code
|
||||
postal_code
|
||||
location {
|
||||
coordinate {
|
||||
lat
|
||||
lon
|
||||
}
|
||||
}
|
||||
}
|
||||
basic {
|
||||
sqft
|
||||
beds
|
||||
baths_full
|
||||
baths_half
|
||||
lot_sqft
|
||||
sold_price
|
||||
sold_price
|
||||
type
|
||||
price
|
||||
status
|
||||
sold_date
|
||||
list_date
|
||||
}
|
||||
details {
|
||||
year_built
|
||||
stories
|
||||
garage
|
||||
permalink
|
||||
}
|
||||
}
|
||||
}"""
|
||||
|
||||
variables = {"listing_id": listing_id}
|
||||
payload = {
|
||||
"query": query,
|
||||
"variables": variables,
|
||||
}
|
||||
|
||||
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
|
||||
response_json = response.json()
|
||||
|
||||
property_info = response_json["data"]["listing"]
|
||||
|
||||
mls = (
|
||||
property_info["source"].get("id")
|
||||
if "source" in property_info and isinstance(property_info["source"], dict)
|
||||
else None
|
||||
)
|
||||
|
||||
able_to_get_lat_long = (
|
||||
property_info
|
||||
and property_info.get("address")
|
||||
and property_info["address"].get("location")
|
||||
and property_info["address"]["location"].get("coordinate")
|
||||
)
|
||||
list_date_str = property_info["basic"]["list_date"].split("T")[0] if property_info["basic"].get(
|
||||
"list_date") else None
|
||||
last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get(
|
||||
"sold_date") else None
|
||||
|
||||
list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None
|
||||
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
|
||||
today = datetime.now()
|
||||
|
||||
days_on_mls = None
|
||||
status = property_info["basic"]["status"].lower()
|
||||
if list_date:
|
||||
if status == "sold" and last_sold_date:
|
||||
days_on_mls = (last_sold_date - list_date).days
|
||||
elif status in ('for_sale', 'for_rent'):
|
||||
days_on_mls = (today - list_date).days
|
||||
if days_on_mls and days_on_mls < 0:
|
||||
days_on_mls = None
|
||||
|
||||
listing = Property(
|
||||
mls=mls,
|
||||
mls_id=property_info["source"].get("listing_id")
|
||||
if "source" in property_info and isinstance(property_info["source"], dict)
|
||||
else None,
|
||||
property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
|
||||
status=property_info["basic"]["status"].upper(),
|
||||
list_price=property_info["basic"]["price"],
|
||||
list_date=list_date,
|
||||
prc_sqft=property_info["basic"].get("price")
|
||||
/ property_info["basic"].get("sqft")
|
||||
if property_info["basic"].get("price")
|
||||
and property_info["basic"].get("sqft")
|
||||
else None,
|
||||
last_sold_date=last_sold_date,
|
||||
latitude=property_info["address"]["location"]["coordinate"].get("lat")
|
||||
if able_to_get_lat_long
|
||||
else None,
|
||||
longitude=property_info["address"]["location"]["coordinate"].get("lon")
|
||||
if able_to_get_lat_long
|
||||
else None,
|
||||
address=self._parse_address(property_info, search_type="handle_listing"),
|
||||
description=Description(
|
||||
style=property_info["basic"].get("type", "").upper(),
|
||||
beds=property_info["basic"].get("beds"),
|
||||
baths_full=property_info["basic"].get("baths_full"),
|
||||
baths_half=property_info["basic"].get("baths_half"),
|
||||
sqft=property_info["basic"].get("sqft"),
|
||||
lot_sqft=property_info["basic"].get("lot_sqft"),
|
||||
sold_price=property_info["basic"].get("sold_price"),
|
||||
year_built=property_info["details"].get("year_built"),
|
||||
garage=property_info["details"].get("garage"),
|
||||
stories=property_info["details"].get("stories"),
|
||||
),
|
||||
days_on_mls=days_on_mls
|
||||
)
|
||||
|
||||
return [listing]
|
||||
|
||||
def get_latest_listing_id(self, property_id: str) -> str | None:
|
||||
query = """query Property($property_id: ID!) {
|
||||
property(id: $property_id) {
|
||||
listings {
|
||||
listing_id
|
||||
primary
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
variables = {"property_id": property_id}
|
||||
payload = {
|
||||
"query": query,
|
||||
"variables": variables,
|
||||
}
|
||||
|
||||
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
|
||||
response_json = response.json()
|
||||
|
||||
property_info = response_json["data"]["property"]
|
||||
if property_info["listings"] is None:
|
||||
return None
|
||||
|
||||
primary_listing = next(
|
||||
(listing for listing in property_info["listings"] if listing["primary"]),
|
||||
None,
|
||||
)
|
||||
if primary_listing:
|
||||
return primary_listing["listing_id"]
|
||||
else:
|
||||
return property_info["listings"][0]["listing_id"]
|
||||
|
||||
def handle_address(self, property_id: str) -> list[Property]:
|
||||
"""
|
||||
Handles a specific address & returns one property
|
||||
"""
|
||||
query = """query Property($property_id: ID!) {
|
||||
property(id: $property_id) {
|
||||
property_id
|
||||
details {
|
||||
date_updated
|
||||
garage
|
||||
permalink
|
||||
year_built
|
||||
stories
|
||||
}
|
||||
address {
|
||||
street_number
|
||||
street_name
|
||||
street_suffix
|
||||
unit
|
||||
city
|
||||
state_code
|
||||
postal_code
|
||||
location {
|
||||
coordinate {
|
||||
lat
|
||||
lon
|
||||
}
|
||||
}
|
||||
}
|
||||
basic {
|
||||
baths
|
||||
beds
|
||||
price
|
||||
sqft
|
||||
lot_sqft
|
||||
type
|
||||
sold_price
|
||||
}
|
||||
public_record {
|
||||
lot_size
|
||||
sqft
|
||||
stories
|
||||
units
|
||||
year_built
|
||||
}
|
||||
}
|
||||
}"""
|
||||
|
||||
variables = {"property_id": property_id}
|
||||
|
||||
payload = {
|
||||
"query": query,
|
||||
"variables": variables,
|
||||
}
|
||||
|
||||
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
|
||||
response_json = response.json()
|
||||
|
||||
property_info = response_json["data"]["property"]
|
||||
|
||||
return [
|
||||
Property(
|
||||
mls_id=property_id,
|
||||
property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
|
||||
address=self._parse_address(
|
||||
property_info, search_type="handle_address"
|
||||
),
|
||||
description=self._parse_description(property_info),
|
||||
)
|
||||
]
|
||||
|
||||
def general_search(
|
||||
self, variables: dict, search_type: str
|
||||
) -> Dict[str, Union[int, list[Property]]]:
|
||||
"""
|
||||
Handles a location area & returns a list of properties
|
||||
"""
|
||||
results_query = """{
|
||||
count
|
||||
total
|
||||
results {
|
||||
property_id
|
||||
list_date
|
||||
status
|
||||
last_sold_price
|
||||
last_sold_date
|
||||
list_price
|
||||
price_per_sqft
|
||||
flags {
|
||||
is_contingent
|
||||
is_pending
|
||||
}
|
||||
description {
|
||||
sqft
|
||||
beds
|
||||
baths_full
|
||||
baths_half
|
||||
lot_sqft
|
||||
sold_price
|
||||
year_built
|
||||
garage
|
||||
sold_price
|
||||
type
|
||||
name
|
||||
stories
|
||||
}
|
||||
source {
|
||||
id
|
||||
listing_id
|
||||
}
|
||||
hoa {
|
||||
fee
|
||||
}
|
||||
location {
|
||||
address {
|
||||
street_number
|
||||
street_name
|
||||
street_suffix
|
||||
unit
|
||||
city
|
||||
state_code
|
||||
postal_code
|
||||
coordinate {
|
||||
lon
|
||||
lat
|
||||
}
|
||||
}
|
||||
neighborhoods {
|
||||
name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}"""
|
||||
|
||||
date_param = (
|
||||
'sold_date: { min: "$today-%sD" }' % self.last_x_days
|
||||
if self.listing_type == ListingType.SOLD and self.last_x_days
|
||||
else (
|
||||
'list_date: { min: "$today-%sD" }' % self.last_x_days
|
||||
if self.last_x_days
|
||||
else ""
|
||||
)
|
||||
)
|
||||
|
||||
sort_param = (
|
||||
"sort: [{ field: sold_date, direction: desc }]"
|
||||
if self.listing_type == ListingType.SOLD
|
||||
else "sort: [{ field: list_date, direction: desc }]"
|
||||
)
|
||||
|
||||
pending_or_contingent_param = (
|
||||
"or_filters: { contingent: true, pending: true }"
|
||||
if self.listing_type == ListingType.PENDING
|
||||
else ""
|
||||
)
|
||||
|
||||
listing_type = ListingType.FOR_SALE if self.listing_type == ListingType.PENDING else self.listing_type
|
||||
|
||||
if search_type == "comps": #: comps search, came from an address
|
||||
query = """query Property_search(
|
||||
$coordinates: [Float]!
|
||||
$radius: String!
|
||||
$offset: Int!,
|
||||
) {
|
||||
home_search(
|
||||
query: {
|
||||
nearby: {
|
||||
coordinates: $coordinates
|
||||
radius: $radius
|
||||
}
|
||||
status: %s
|
||||
%s
|
||||
%s
|
||||
}
|
||||
%s
|
||||
limit: 200
|
||||
offset: $offset
|
||||
) %s""" % (
|
||||
listing_type.value.lower(),
|
||||
date_param,
|
||||
pending_or_contingent_param,
|
||||
sort_param,
|
||||
results_query,
|
||||
)
|
||||
elif search_type == "area": #: general search, came from a general location
|
||||
query = """query Home_search(
|
||||
$city: String,
|
||||
$county: [String],
|
||||
$state_code: String,
|
||||
$postal_code: String
|
||||
$offset: Int,
|
||||
) {
|
||||
home_search(
|
||||
query: {
|
||||
city: $city
|
||||
county: $county
|
||||
postal_code: $postal_code
|
||||
state_code: $state_code
|
||||
status: %s
|
||||
%s
|
||||
%s
|
||||
}
|
||||
%s
|
||||
limit: 200
|
||||
offset: $offset
|
||||
) %s""" % (
|
||||
listing_type.value.lower(),
|
||||
date_param,
|
||||
pending_or_contingent_param,
|
||||
sort_param,
|
||||
results_query,
|
||||
)
|
||||
else: #: general search, came from an address
|
||||
query = (
|
||||
"""query Property_search(
|
||||
$property_id: [ID]!
|
||||
$offset: Int!,
|
||||
) {
|
||||
property_search(
|
||||
query: {
|
||||
property_id: $property_id
|
||||
}
|
||||
limit: 1
|
||||
offset: $offset
|
||||
) %s"""
|
||||
% results_query
|
||||
)
|
||||
|
||||
payload = {
|
||||
"query": query,
|
||||
"variables": variables,
|
||||
}
|
||||
|
||||
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
|
||||
response.raise_for_status()
|
||||
response_json = response.json()
|
||||
search_key = "home_search" if "home_search" in query else "property_search"
|
||||
|
||||
properties: list[Property] = []
|
||||
|
||||
if (
|
||||
response_json is None
|
||||
or "data" not in response_json
|
||||
or response_json["data"] is None
|
||||
or search_key not in response_json["data"]
|
||||
or response_json["data"][search_key] is None
|
||||
or "results" not in response_json["data"][search_key]
|
||||
):
|
||||
return {"total": 0, "properties": []}
|
||||
|
||||
for result in response_json["data"][search_key]["results"]:
|
||||
mls = (
|
||||
result["source"].get("id")
|
||||
if "source" in result and isinstance(result["source"], dict)
|
||||
else None
|
||||
)
|
||||
|
||||
if not mls and self.mls_only:
|
||||
continue
|
||||
|
||||
able_to_get_lat_long = (
|
||||
result
|
||||
and result.get("location")
|
||||
and result["location"].get("address")
|
||||
and result["location"]["address"].get("coordinate")
|
||||
)
|
||||
|
||||
is_pending = result["flags"].get("is_pending") or result["flags"].get("is_contingent")
|
||||
|
||||
realty_property = Property(
|
||||
mls=mls,
|
||||
mls_id=result["source"].get("listing_id")
|
||||
if "source" in result and isinstance(result["source"], dict)
|
||||
else None,
|
||||
property_url=f"{self.PROPERTY_URL}{result['property_id']}",
|
||||
status="PENDING" if is_pending else result["status"].upper(),
|
||||
list_price=result["list_price"],
|
||||
list_date=result["list_date"].split("T")[0]
|
||||
if result.get("list_date")
|
||||
else None,
|
||||
prc_sqft=result.get("price_per_sqft"),
|
||||
last_sold_date=result.get("last_sold_date"),
|
||||
hoa_fee=result["hoa"]["fee"]
|
||||
if result.get("hoa") and isinstance(result["hoa"], dict)
|
||||
else None,
|
||||
latitude=result["location"]["address"]["coordinate"].get("lat")
|
||||
if able_to_get_lat_long
|
||||
else None,
|
||||
longitude=result["location"]["address"]["coordinate"].get("lon")
|
||||
if able_to_get_lat_long
|
||||
else None,
|
||||
address=self._parse_address(result, search_type="general_search"),
|
||||
description=self._parse_description(result),
|
||||
days_on_mls=self.calculate_days_on_mls(result)
|
||||
)
|
||||
properties.append(realty_property)
|
||||
|
||||
return {
|
||||
"total": response_json["data"][search_key]["total"],
|
||||
"properties": properties,
|
||||
}
|
||||
|
||||
def search(self):
|
||||
location_info = self.handle_location()
|
||||
location_type = location_info["area_type"]
|
||||
|
||||
search_variables = {
|
||||
"offset": 0,
|
||||
}
|
||||
|
||||
search_type = (
|
||||
"comps"
|
||||
if self.radius and location_type == "address"
|
||||
else "address"
|
||||
if location_type == "address" and not self.radius
|
||||
else "area"
|
||||
)
|
||||
if location_type == "address":
|
||||
if not self.radius: #: single address search, non comps
|
||||
property_id = location_info["mpr_id"]
|
||||
search_variables |= {"property_id": property_id}
|
||||
|
||||
gql_results = self.general_search(
|
||||
search_variables, search_type=search_type
|
||||
)
|
||||
if gql_results["total"] == 0:
|
||||
listing_id = self.get_latest_listing_id(property_id)
|
||||
if listing_id is None:
|
||||
return self.handle_address(property_id)
|
||||
else:
|
||||
return self.handle_listing(listing_id)
|
||||
else:
|
||||
return gql_results["properties"]
|
||||
|
||||
else: #: general search, comps (radius)
|
||||
coordinates = list(location_info["centroid"].values())
|
||||
search_variables |= {
|
||||
"coordinates": coordinates,
|
||||
"radius": "{}mi".format(self.radius),
|
||||
}
|
||||
|
||||
else: #: general search, location
|
||||
search_variables |= {
|
||||
"city": location_info.get("city"),
|
||||
"county": location_info.get("county"),
|
||||
"state_code": location_info.get("state_code"),
|
||||
"postal_code": location_info.get("postal_code"),
|
||||
}
|
||||
|
||||
result = self.general_search(search_variables, search_type=search_type)
|
||||
total = result["total"]
|
||||
homes = result["properties"]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
self.general_search,
|
||||
variables=search_variables | {"offset": i},
|
||||
search_type=search_type,
|
||||
)
|
||||
for i in range(200, min(total, 10000), 200)
|
||||
]
|
||||
|
||||
for future in as_completed(futures):
|
||||
homes.extend(future.result()["properties"])
|
||||
|
||||
return homes
|
||||
|
||||
@staticmethod
|
||||
def _parse_neighborhoods(result: dict) -> Optional[str]:
|
||||
neighborhoods_list = []
|
||||
neighborhoods = result["location"].get("neighborhoods", [])
|
||||
|
||||
if neighborhoods:
|
||||
for neighborhood in neighborhoods:
|
||||
name = neighborhood.get("name")
|
||||
if name:
|
||||
neighborhoods_list.append(name)
|
||||
|
||||
return ", ".join(neighborhoods_list) if neighborhoods_list else None
|
||||
|
||||
@staticmethod
|
||||
def _parse_address(result: dict, search_type):
|
||||
if search_type == "general_search":
|
||||
return Address(
|
||||
street=f"{result['location']['address']['street_number']} {result['location']['address']['street_name']} {result['location']['address']['street_suffix']}",
|
||||
unit=result["location"]["address"]["unit"],
|
||||
city=result["location"]["address"]["city"],
|
||||
state=result["location"]["address"]["state_code"],
|
||||
zip=result["location"]["address"]["postal_code"],
|
||||
)
|
||||
return Address(
|
||||
street=f"{result['address']['street_number']} {result['address']['street_name']} {result['address']['street_suffix']}",
|
||||
unit=result["address"]["unit"],
|
||||
city=result["address"]["city"],
|
||||
state=result["address"]["state_code"],
|
||||
zip=result["address"]["postal_code"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_description(result: dict) -> Description:
|
||||
description_data = result.get("description", {})
|
||||
|
||||
if description_data is None or not isinstance(description_data, dict):
|
||||
description_data = {}
|
||||
|
||||
style = description_data.get("type", "")
|
||||
if style is not None:
|
||||
style = style.upper()
|
||||
|
||||
return Description(
|
||||
style=style,
|
||||
beds=description_data.get("beds"),
|
||||
baths_full=description_data.get("baths_full"),
|
||||
baths_half=description_data.get("baths_half"),
|
||||
sqft=description_data.get("sqft"),
|
||||
lot_sqft=description_data.get("lot_sqft"),
|
||||
sold_price=description_data.get("sold_price"),
|
||||
year_built=description_data.get("year_built"),
|
||||
garage=description_data.get("garage"),
|
||||
stories=description_data.get("stories"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def calculate_days_on_mls(result: dict) -> Optional[int]:
|
||||
list_date_str = result.get("list_date")
|
||||
list_date = datetime.strptime(list_date_str.split("T")[0], "%Y-%m-%d") if list_date_str else None
|
||||
last_sold_date_str = result.get("last_sold_date")
|
||||
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
|
||||
today = datetime.now()
|
||||
|
||||
if list_date:
|
||||
if result["status"] == 'sold':
|
||||
if last_sold_date:
|
||||
days = (last_sold_date - list_date).days
|
||||
if days >= 0:
|
||||
return days
|
||||
elif result["status"] in ('for_sale', 'for_rent'):
|
||||
days = (today - list_date).days
|
||||
if days >= 0:
|
||||
return days
|
||||
6
homeharvest/exceptions.py
Normal file
6
homeharvest/exceptions.py
Normal file
@@ -0,0 +1,6 @@
|
||||
class InvalidListingType(Exception):
|
||||
"""Raised when a provided listing type is does not exist."""
|
||||
|
||||
|
||||
class NoResultsFound(Exception):
|
||||
"""Raised when no results are found for the given location"""
|
||||
72
homeharvest/utils.py
Normal file
72
homeharvest/utils.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from .core.scrapers.models import Property, ListingType
|
||||
import pandas as pd
|
||||
from .exceptions import InvalidListingType
|
||||
|
||||
ordered_properties = [
|
||||
"property_url",
|
||||
"mls",
|
||||
"mls_id",
|
||||
"status",
|
||||
"style",
|
||||
"street",
|
||||
"unit",
|
||||
"city",
|
||||
"state",
|
||||
"zip_code",
|
||||
"beds",
|
||||
"full_baths",
|
||||
"half_baths",
|
||||
"sqft",
|
||||
"year_built",
|
||||
"days_on_mls",
|
||||
"list_price",
|
||||
"list_date",
|
||||
"sold_price",
|
||||
"last_sold_date",
|
||||
"lot_sqft",
|
||||
"price_per_sqft",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"stories",
|
||||
"hoa_fee",
|
||||
"parking_garage",
|
||||
]
|
||||
|
||||
|
||||
def process_result(result: Property) -> pd.DataFrame:
|
||||
prop_data = {prop: None for prop in ordered_properties}
|
||||
prop_data.update(result.__dict__)
|
||||
|
||||
if "address" in prop_data:
|
||||
address_data = prop_data["address"]
|
||||
prop_data["street"] = address_data.street
|
||||
prop_data["unit"] = address_data.unit
|
||||
prop_data["city"] = address_data.city
|
||||
prop_data["state"] = address_data.state
|
||||
prop_data["zip_code"] = address_data.zip
|
||||
|
||||
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
|
||||
|
||||
description = result.description
|
||||
prop_data["style"] = description.style
|
||||
prop_data["beds"] = description.beds
|
||||
prop_data["full_baths"] = description.baths_full
|
||||
prop_data["half_baths"] = description.baths_half
|
||||
prop_data["sqft"] = description.sqft
|
||||
prop_data["lot_sqft"] = description.lot_sqft
|
||||
prop_data["sold_price"] = description.sold_price
|
||||
prop_data["year_built"] = description.year_built
|
||||
prop_data["parking_garage"] = description.garage
|
||||
prop_data["stories"] = description.stories
|
||||
|
||||
properties_df = pd.DataFrame([prop_data])
|
||||
properties_df = properties_df.reindex(columns=ordered_properties)
|
||||
|
||||
return properties_df[ordered_properties]
|
||||
|
||||
|
||||
def validate_input(listing_type: str) -> None:
|
||||
if listing_type.upper() not in ListingType.__members__:
|
||||
raise InvalidListingType(
|
||||
f"Provided listing type, '{listing_type}', does not exist."
|
||||
)
|
||||
441
poetry.lock
generated
Normal file
441
poetry.lock
generated
Normal file
@@ -0,0 +1,441 @@
|
||||
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2023.7.22"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
|
||||
{file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "3.3.0"
|
||||
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
files = [
|
||||
{file = "charset-normalizer-3.3.0.tar.gz", hash = "sha256:63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effe5406c9bd748a871dbcaf3ac69167c38d72db8c9baf3ff954c344f31c4cbe"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4162918ef3098851fcd8a628bf9b6a98d10c380725df9e04caf5ca6dd48c847a"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0570d21da019941634a531444364f2482e8db0b3425fcd5ac0c36565a64142c8"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5707a746c6083a3a74b46b3a631d78d129edab06195a92a8ece755aac25a3f3d"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:278c296c6f96fa686d74eb449ea1697f3c03dc28b75f873b65b5201806346a69"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4b71f4d1765639372a3b32d2638197f5cd5221b19531f9245fcc9ee62d38f56"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5969baeaea61c97efa706b9b107dcba02784b1601c74ac84f2a532ea079403e"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3f93dab657839dfa61025056606600a11d0b696d79386f974e459a3fbc568ec"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:db756e48f9c5c607b5e33dd36b1d5872d0422e960145b08ab0ec7fd420e9d649"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:232ac332403e37e4a03d209a3f92ed9071f7d3dbda70e2a5e9cff1c4ba9f0678"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e5c1502d4ace69a179305abb3f0bb6141cbe4714bc9b31d427329a95acfc8bdd"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:2502dd2a736c879c0f0d3e2161e74d9907231e25d35794584b1ca5284e43f596"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23e8565ab7ff33218530bc817922fae827420f143479b753104ab801145b1d5b"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-win32.whl", hash = "sha256:1872d01ac8c618a8da634e232f24793883d6e456a66593135aeafe3784b0848d"},
|
||||
{file = "charset_normalizer-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:557b21a44ceac6c6b9773bc65aa1b4cc3e248a5ad2f5b914b91579a32e22204d"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d7eff0f27edc5afa9e405f7165f85a6d782d308f3b6b9d96016c010597958e63"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a685067d05e46641d5d1623d7c7fdf15a357546cbb2f71b0ebde91b175ffc3e"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d3d5b7db9ed8a2b11a774db2bbea7ba1884430a205dbd54a32d61d7c2a190fa"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2935ffc78db9645cb2086c2f8f4cfd23d9b73cc0dc80334bc30aac6f03f68f8c"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fe359b2e3a7729010060fbca442ca225280c16e923b37db0e955ac2a2b72a05"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:380c4bde80bce25c6e4f77b19386f5ec9db230df9f2f2ac1e5ad7af2caa70459"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0d1e3732768fecb052d90d62b220af62ead5748ac51ef61e7b32c266cac9293"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b2919306936ac6efb3aed1fbf81039f7087ddadb3160882a57ee2ff74fd2382"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f8888e31e3a85943743f8fc15e71536bda1c81d5aa36d014a3c0c44481d7db6e"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:82eb849f085624f6a607538ee7b83a6d8126df6d2f7d3b319cb837b289123078"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7b8b8bf1189b3ba9b8de5c8db4d541b406611a71a955bbbd7385bbc45fcb786c"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5adf257bd58c1b8632046bbe43ee38c04e1038e9d37de9c57a94d6bd6ce5da34"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c350354efb159b8767a6244c166f66e67506e06c8924ed74669b2c70bc8735b1"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-win32.whl", hash = "sha256:02af06682e3590ab952599fbadac535ede5d60d78848e555aa58d0c0abbde786"},
|
||||
{file = "charset_normalizer-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:86d1f65ac145e2c9ed71d8ffb1905e9bba3a91ae29ba55b4c46ae6fc31d7c0d4"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3b447982ad46348c02cb90d230b75ac34e9886273df3a93eec0539308a6296d7"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:abf0d9f45ea5fb95051c8bfe43cb40cda383772f7e5023a83cc481ca2604d74e"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b09719a17a2301178fac4470d54b1680b18a5048b481cb8890e1ef820cb80455"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3d9b48ee6e3967b7901c052b670c7dda6deb812c309439adaffdec55c6d7b78"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edfe077ab09442d4ef3c52cb1f9dab89bff02f4524afc0acf2d46be17dc479f5"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3debd1150027933210c2fc321527c2299118aa929c2f5a0a80ab6953e3bd1908"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f63face3a527284f7bb8a9d4f78988e3c06823f7bea2bd6f0e0e9298ca0403"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24817cb02cbef7cd499f7c9a2735286b4782bd47a5b3516a0e84c50eab44b98e"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c71f16da1ed8949774ef79f4a0260d28b83b3a50c6576f8f4f0288d109777989"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9cf3126b85822c4e53aa28c7ec9869b924d6fcfb76e77a45c44b83d91afd74f9"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b3b2316b25644b23b54a6f6401074cebcecd1244c0b8e80111c9a3f1c8e83d65"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:03680bb39035fbcffe828eae9c3f8afc0428c91d38e7d61aa992ef7a59fb120e"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cc152c5dd831641e995764f9f0b6589519f6f5123258ccaca8c6d34572fefa8"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-win32.whl", hash = "sha256:b8f3307af845803fb0b060ab76cf6dd3a13adc15b6b451f54281d25911eb92df"},
|
||||
{file = "charset_normalizer-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:8eaf82f0eccd1505cf39a45a6bd0a8cf1c70dcfc30dba338207a969d91b965c0"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dc45229747b67ffc441b3de2f3ae5e62877a282ea828a5bdb67883c4ee4a8810"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f4a0033ce9a76e391542c182f0d48d084855b5fcba5010f707c8e8c34663d77"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ada214c6fa40f8d800e575de6b91a40d0548139e5dc457d2ebb61470abf50186"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1121de0e9d6e6ca08289583d7491e7fcb18a439305b34a30b20d8215922d43c"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1063da2c85b95f2d1a430f1c33b55c9c17ffaf5e612e10aeaad641c55a9e2b9d"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70f1d09c0d7748b73290b29219e854b3207aea922f839437870d8cc2168e31cc"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:250c9eb0f4600361dd80d46112213dff2286231d92d3e52af1e5a6083d10cad9"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:750b446b2ffce1739e8578576092179160f6d26bd5e23eb1789c4d64d5af7dc7"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:fc52b79d83a3fe3a360902d3f5d79073a993597d48114c29485e9431092905d8"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:588245972aca710b5b68802c8cad9edaa98589b1b42ad2b53accd6910dad3545"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e39c7eb31e3f5b1f88caff88bcff1b7f8334975b46f6ac6e9fc725d829bc35d4"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-win32.whl", hash = "sha256:abecce40dfebbfa6abf8e324e1860092eeca6f7375c8c4e655a8afb61af58f2c"},
|
||||
{file = "charset_normalizer-3.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a91a981f185721542a0b7c92e9054b7ab4fea0508a795846bc5b0abf8118d4"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:67b8cc9574bb518ec76dc8e705d4c39ae78bb96237cb533edac149352c1f39fe"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac71b2977fb90c35d41c9453116e283fac47bb9096ad917b8819ca8b943abecd"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3ae38d325b512f63f8da31f826e6cb6c367336f95e418137286ba362925c877e"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:542da1178c1c6af8873e143910e2269add130a299c9106eef2594e15dae5e482"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a85aed0b864ac88309b7d94be09f6046c834ef60762a8833b660139cfbad13"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aae32c93e0f64469f74ccc730a7cb21c7610af3a775157e50bbd38f816536b38"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b26ddf78d57f1d143bdf32e820fd8935d36abe8a25eb9ec0b5a71c82eb3895"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f5d10bae5d78e4551b7be7a9b29643a95aded9d0f602aa2ba584f0388e7a557"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:249c6470a2b60935bafd1d1d13cd613f8cd8388d53461c67397ee6a0f5dce741"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c5a74c359b2d47d26cdbbc7845e9662d6b08a1e915eb015d044729e92e7050b7"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:b5bcf60a228acae568e9911f410f9d9e0d43197d030ae5799e20dca8df588287"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:187d18082694a29005ba2944c882344b6748d5be69e3a89bf3cc9d878e548d5a"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:81bf654678e575403736b85ba3a7867e31c2c30a69bc57fe88e3ace52fb17b89"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-win32.whl", hash = "sha256:85a32721ddde63c9df9ebb0d2045b9691d9750cb139c161c80e500d210f5e26e"},
|
||||
{file = "charset_normalizer-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:468d2a840567b13a590e67dd276c570f8de00ed767ecc611994c301d0f8c014f"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e0fc42822278451bc13a2e8626cf2218ba570f27856b536e00cfa53099724828"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09c77f964f351a7369cc343911e0df63e762e42bac24cd7d18525961c81754f4"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12ebea541c44fdc88ccb794a13fe861cc5e35d64ed689513a5c03d05b53b7c82"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:805dfea4ca10411a5296bcc75638017215a93ffb584c9e344731eef0dcfb026a"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96c2b49eb6a72c0e4991d62406e365d87067ca14c1a729a870d22354e6f68115"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaf7b34c5bc56b38c931a54f7952f1ff0ae77a2e82496583b247f7c969eb1479"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:619d1c96099be5823db34fe89e2582b336b5b074a7f47f819d6b3a57ff7bdb86"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0ac5e7015a5920cfce654c06618ec40c33e12801711da6b4258af59a8eff00a"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93aa7eef6ee71c629b51ef873991d6911b906d7312c6e8e99790c0f33c576f89"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7966951325782121e67c81299a031f4c115615e68046f79b85856b86ebffc4cd"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:02673e456dc5ab13659f85196c534dc596d4ef260e4d86e856c3b2773ce09843"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c2af80fb58f0f24b3f3adcb9148e6203fa67dd3f61c4af146ecad033024dde43"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:153e7b6e724761741e0974fc4dcd406d35ba70b92bfe3fedcb497226c93b9da7"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-win32.whl", hash = "sha256:d47ecf253780c90ee181d4d871cd655a789da937454045b17b5798da9393901a"},
|
||||
{file = "charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:d97d85fa63f315a8bdaba2af9a6a686e0eceab77b3089af45133252618e70884"},
|
||||
{file = "charset_normalizer-3.3.0-py3-none-any.whl", hash = "sha256:e46cd37076971c1040fc8c41273a8b3e2c624ce4f2be3f5dfcb7a430c1d3acc2"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
description = "Cross-platform colored terminal text."
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||
files = [
|
||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "et-xmlfile"
|
||||
version = "1.1.0"
|
||||
description = "An implementation of lxml.xmlfile for the standard library"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
|
||||
{file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.1.3"
|
||||
description = "Backport of PEP 654 (exception groups)"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
|
||||
{file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.4"
|
||||
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
files = [
|
||||
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
|
||||
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.0.0"
|
||||
description = "brain-dead simple config-ini parsing"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
||||
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "1.26.0"
|
||||
description = "Fundamental package for array computing in Python"
|
||||
optional = false
|
||||
python-versions = "<3.13,>=3.9"
|
||||
files = [
|
||||
{file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"},
|
||||
{file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"},
|
||||
{file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"},
|
||||
{file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"},
|
||||
{file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"},
|
||||
{file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"},
|
||||
{file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"},
|
||||
{file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"},
|
||||
{file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openpyxl"
|
||||
version = "3.1.2"
|
||||
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"},
|
||||
{file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
et-xmlfile = "*"
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "23.2"
|
||||
description = "Core utilities for Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
|
||||
{file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pandas"
|
||||
version = "2.1.1"
|
||||
description = "Powerful data structures for data analysis, time series, and statistics"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
|
||||
{file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
|
||||
{file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
|
||||
{file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
|
||||
{file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
|
||||
{file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = [
|
||||
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
||||
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||
]
|
||||
python-dateutil = ">=2.8.2"
|
||||
pytz = ">=2020.1"
|
||||
tzdata = ">=2022.1"
|
||||
|
||||
[package.extras]
|
||||
all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
|
||||
aws = ["s3fs (>=2022.05.0)"]
|
||||
clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
|
||||
compression = ["zstandard (>=0.17.0)"]
|
||||
computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
|
||||
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
|
||||
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
|
||||
feather = ["pyarrow (>=7.0.0)"]
|
||||
fss = ["fsspec (>=2022.05.0)"]
|
||||
gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
|
||||
hdf5 = ["tables (>=3.7.0)"]
|
||||
html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
|
||||
mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
|
||||
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
|
||||
parquet = ["pyarrow (>=7.0.0)"]
|
||||
performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
|
||||
plot = ["matplotlib (>=3.6.1)"]
|
||||
postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
|
||||
spss = ["pyreadstat (>=1.1.5)"]
|
||||
sql-other = ["SQLAlchemy (>=1.4.36)"]
|
||||
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
|
||||
xml = ["lxml (>=4.8.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.3.0"
|
||||
description = "plugin and hook calling mechanisms for python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
|
||||
{file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.4.2"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"},
|
||||
{file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
||||
iniconfig = "*"
|
||||
packaging = "*"
|
||||
pluggy = ">=0.12,<2.0"
|
||||
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.8.2"
|
||||
description = "Extensions to the standard Python datetime module"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
|
||||
files = [
|
||||
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
|
||||
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
six = ">=1.5"
|
||||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2023.3.post1"
|
||||
description = "World timezone definitions, modern and historical"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
|
||||
{file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.31.0"
|
||||
description = "Python HTTP for Humans."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
|
||||
{file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
certifi = ">=2017.4.17"
|
||||
charset-normalizer = ">=2,<4"
|
||||
idna = ">=2.5,<4"
|
||||
urllib3 = ">=1.21.1,<3"
|
||||
|
||||
[package.extras]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.16.0"
|
||||
description = "Python 2 and 3 compatibility utilities"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
files = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.0.1"
|
||||
description = "A lil' TOML parser"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tzdata"
|
||||
version = "2023.3"
|
||||
description = "Provider of IANA time zone data"
|
||||
optional = false
|
||||
python-versions = ">=2"
|
||||
files = [
|
||||
{file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
|
||||
{file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.0.6"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "urllib3-2.0.6-py3-none-any.whl", hash = "sha256:7a7c7003b000adf9e7ca2a377c9688bbc54ed41b985789ed576570342a375cd2"},
|
||||
{file = "urllib3-2.0.6.tar.gz", hash = "sha256:b19e1a85d206b56d7df1d5e683df4a7725252a964e3993648dd0fb5a1c157564"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
|
||||
secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"]
|
||||
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "09ad811d74a42363ff4c3ccd012d8f73c89d7d978e5a6445b0f3d2e231922f1b"
|
||||
@@ -1,13 +1,23 @@
|
||||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.1.0"
|
||||
description = "Real estate scraping library for Zillow & Redfin"
|
||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>"]
|
||||
version = "0.3.6"
|
||||
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
[tool.poetry.scripts]
|
||||
homeharvest = "homeharvest.cli:main"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.10,<3.13"
|
||||
requests = "^2.31.0"
|
||||
pandas = "^2.1.1"
|
||||
openpyxl = "^3.1.2"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.2"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
||||
121
tests/test_realtor.py
Normal file
121
tests/test_realtor.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from homeharvest import scrape_property
|
||||
from homeharvest.exceptions import (
|
||||
InvalidListingType,
|
||||
NoResultsFound,
|
||||
)
|
||||
|
||||
|
||||
def test_realtor_pending_or_contingent():
|
||||
pending_or_contingent_result = scrape_property(
|
||||
location="Surprise, AZ", listing_type="pending"
|
||||
)
|
||||
|
||||
regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale")
|
||||
|
||||
assert all(
|
||||
[
|
||||
result is not None
|
||||
for result in [pending_or_contingent_result, regular_result]
|
||||
]
|
||||
)
|
||||
assert len(pending_or_contingent_result) != len(regular_result)
|
||||
|
||||
|
||||
def test_realtor_pending_comps():
|
||||
pending_comps = scrape_property(
|
||||
location="2530 Al Lipscomb Way",
|
||||
radius=5,
|
||||
past_days=180,
|
||||
listing_type="pending",
|
||||
)
|
||||
|
||||
for_sale_comps = scrape_property(
|
||||
location="2530 Al Lipscomb Way",
|
||||
radius=5,
|
||||
past_days=180,
|
||||
listing_type="for_sale",
|
||||
)
|
||||
|
||||
sold_comps = scrape_property(
|
||||
location="2530 Al Lipscomb Way",
|
||||
radius=5,
|
||||
past_days=180,
|
||||
listing_type="sold",
|
||||
)
|
||||
|
||||
results = [pending_comps, for_sale_comps, sold_comps]
|
||||
assert all([result is not None for result in results])
|
||||
|
||||
#: assert all lengths are different
|
||||
assert len(set([len(result) for result in results])) == len(results)
|
||||
|
||||
|
||||
def test_realtor_comps():
|
||||
result = scrape_property(
|
||||
location="2530 Al Lipscomb Way",
|
||||
radius=0.5,
|
||||
past_days=180,
|
||||
listing_type="sold",
|
||||
)
|
||||
|
||||
assert result is not None and len(result) > 0
|
||||
|
||||
|
||||
def test_realtor_last_x_days_sold():
|
||||
days_result_30 = scrape_property(
|
||||
location="Dallas, TX", listing_type="sold", past_days=30
|
||||
)
|
||||
|
||||
days_result_10 = scrape_property(
|
||||
location="Dallas, TX", listing_type="sold", past_days=10
|
||||
)
|
||||
|
||||
assert all(
|
||||
[result is not None for result in [days_result_30, days_result_10]]
|
||||
) and len(days_result_30) != len(days_result_10)
|
||||
|
||||
|
||||
def test_realtor_single_property():
|
||||
results = [
|
||||
scrape_property(
|
||||
location="15509 N 172nd Dr, Surprise, AZ 85388",
|
||||
listing_type="for_sale",
|
||||
),
|
||||
scrape_property(
|
||||
location="2530 Al Lipscomb Way",
|
||||
listing_type="for_sale",
|
||||
),
|
||||
]
|
||||
|
||||
assert all([result is not None for result in results])
|
||||
|
||||
|
||||
def test_realtor():
|
||||
results = [
|
||||
scrape_property(
|
||||
location="2530 Al Lipscomb Way",
|
||||
listing_type="for_sale",
|
||||
),
|
||||
scrape_property(
|
||||
location="Phoenix, AZ", listing_type="for_rent"
|
||||
), #: does not support "city, state, USA" format
|
||||
scrape_property(
|
||||
location="Dallas, TX", listing_type="sold"
|
||||
), #: does not support "city, state, USA" format
|
||||
scrape_property(location="85281"),
|
||||
]
|
||||
|
||||
assert all([result is not None for result in results])
|
||||
|
||||
bad_results = []
|
||||
try:
|
||||
bad_results += [
|
||||
scrape_property(
|
||||
location="abceefg ju098ot498hh9",
|
||||
listing_type="for_sale",
|
||||
)
|
||||
]
|
||||
except (InvalidListingType, NoResultsFound):
|
||||
assert True
|
||||
|
||||
assert all([result is None for result in bad_results])
|
||||
Reference in New Issue
Block a user