Compare commits

...

165 Commits

Author SHA1 Message Date
Cullen Watson
6076b0f961 enh: add agent (#68) 2024-04-16 15:09:32 -05:00
Cullen Watson
cdc6f2a2a8 docs: readme 2024-04-16 14:59:50 -05:00
Cullen Watson
0bdf56568e enh: add agent name/phone (#66) 2024-04-16 14:55:44 -05:00
Cullen Watson
1f47fc3b7e fix: use enum value (#65) 2024-04-12 01:41:15 -05:00
Zachary Hampton
5c2498c62b - pending date, property type fields (#45)
- alt photos bug fix (#57)
2024-03-13 19:17:17 -07:00
Zachary Hampton
d775540afd - location bug fix 2024-03-06 16:31:06 -07:00
Cullen Watson
5ea9a6f6b6 docs: readme 2024-03-03 11:49:27 -06:00
robertomr100
ab6a0e3b6e Add foreclosure parameter (#55) 2024-03-03 11:45:28 -06:00
Zachary Hampton
03198428de Merge pull request #48 from Bunsly/for_rent_url
fix: rent url
2024-01-09 13:12:30 -07:00
Cullen Watson
70fa071318 fix: rent url 2024-01-08 12:46:31 -06:00
Cullen Watson
f7e74cf535 Merge pull request #44 from Bunsly/fix_postal_search
fix postal search to search just by zip
2023-12-02 00:40:13 -06:00
Cullen Watson
e17b976923 fix postal search to search just by zip 2023-12-02 00:39:28 -06:00
Zachary Hampton
ad13b55ea6 Update README.md 2023-11-30 11:48:48 -07:00
Cullen Watson
19f23c95c4 Merge pull request #43 from Bunsly/add_photos
Add photos
2023-11-24 21:40:34 -06:00
Cullen
4676ec9839 chore: remove test file 2023-11-24 13:42:52 -06:00
Cullen
6dd0b058d3 chore: version 2023-11-24 13:41:46 -06:00
Cullen
a74c1a9950 enh: add photos 2023-11-24 13:40:57 -06:00
Cullen Watson
fa507dbc72 docs: typo 2023-11-20 01:05:10 -06:00
Cullen Watson
5b6a9943cc Merge pull request #42 from Bunsly/street_dirction
fix: add street direction
2023-11-08 16:53:29 -06:00
Cullen Watson
9816defaf3 chore: version 2023-11-08 16:53:05 -06:00
Cullen Watson
f692b438b2 fix: add street direction 2023-11-08 16:52:06 -06:00
Zachary Hampton
30f48f54c8 Update README.md 2023-11-06 22:13:01 -07:00
Cullen Watson
7f86f69610 docs: readme 2023-11-03 18:53:46 -05:00
Cullen Watson
cc64dacdb0 docs: readme - date_from, date_to 2023-11-03 18:52:22 -05:00
Cullen Watson
d3268d8e5a Merge pull request #40 from Bunsly/date_range
Add date_to and date_from params
2023-11-03 18:42:13 -05:00
Cullen Watson
4edad901c5 [enh] date_to and date_from 2023-11-03 18:40:34 -05:00
Zachary Hampton
c597a78191 - None address bug fix 2023-10-18 16:32:43 -07:00
Zachary Hampton
11a7d854f0 - remove pending listings from for_sale 2023-10-18 14:41:41 -07:00
Zachary Hampton
f726548cc6 Update pyproject.toml 2023-10-18 09:35:48 -07:00
Zachary Hampton
fad7d670eb Update README.md 2023-10-18 08:37:42 -07:00
Zachary Hampton
89a6f93c9f Update pyproject.toml 2023-10-18 08:37:26 -07:00
Zachary Hampton
e1090b06e4 Update README.md 2023-10-17 20:22:25 -07:00
Cullen Watson
5036e74b60 Merge branch 'master' of https://github.com/ZacharyHampton/HomeHarvest 2023-10-09 11:30:17 -05:00
Cullen Watson
2cb544bc8d [chore] display clickable URLs in jupyter 2023-10-09 11:28:56 -05:00
Zachary Hampton
68cb365e03 Merge pull request #34 from ZacharyHampton/days_on_mls
[enh] days_on_mls attr
2023-10-09 09:04:59 -07:00
Cullen Watson
23876d5725 [chore] function types 2023-10-09 11:02:51 -05:00
Cullen Watson
b59d55f6b5 [enh] days_on_mls attr 2023-10-09 11:00:36 -05:00
Cullen Watson
3c3adb5f29 [docs] update video 2023-10-05 20:24:23 -05:00
Zachary Hampton
6ede8622cc - pending listing support
- removal of pending_or_contingent param
2023-10-05 11:43:00 -07:00
Cullen Watson
9f50d33bdb [chore] remove unused dependency 2023-10-05 10:11:45 -05:00
Cullen Watson
735ec021f7 [docs] README 2023-10-05 10:03:21 -05:00
Zachary Hampton
00537329cf - version bump 2023-10-04 21:35:21 -07:00
Zachary Hampton
a9225b532f - rename days variable 2023-10-04 21:35:14 -07:00
Zachary Hampton
ba7ad069c9 Merge pull request #32 from ZacharyHampton/key_error
[fix] keyerror on style
2023-10-04 20:35:05 -07:00
Cullen Watson
22bda972b0 [chore] version number 2023-10-04 22:34:52 -05:00
Cullen Watson
6f5bbf79a4 [fix] keyerror on style 2023-10-04 22:33:21 -05:00
Cullen Watson
608cceba34 [docs] reorder 2023-10-04 22:12:16 -05:00
Cullen Watson
3609586995 [docs]: add contingent to example 2023-10-04 22:11:38 -05:00
Cullen Watson
68c7e411e4 [docs] pending / contingent searches 2023-10-04 22:07:51 -05:00
Cullen Watson
5e825601a7 [docs] update example 2023-10-04 21:50:54 -05:00
Cullen Watson
ce3f94d0af [docs] update example 2023-10-04 21:50:16 -05:00
Zachary Hampton
4a1116440d Merge pull request #31 from ZacharyHampton/v0.3
v0.3
2023-10-04 19:26:44 -07:00
Cullen Watson
2d092c595f [docs]: Update README.md 2023-10-04 21:24:24 -05:00
Cullen Watson
4dbb064fe9 [docs]: Update README.md 2023-10-04 21:21:45 -05:00
Cullen Watson
4e78248032 Update README.md 2023-10-04 21:17:49 -05:00
Zachary Hampton
37e20f4469 - remove neighborhoods
- rename data
2023-10-04 18:44:47 -07:00
Zachary Hampton
8a5f0dc2c9 - pending or contingent support 2023-10-04 18:25:01 -07:00
Zachary Hampton
de692faae2 - rename last_x_days
- docstrings for scrape_property
2023-10-04 18:06:06 -07:00
Zachary Hampton
6bb68766fc - realtor tests 2023-10-04 12:04:05 -07:00
Zachary Hampton
446d5488b8 - single address support again 2023-10-04 10:07:32 -07:00
Cullen Watson
68e15ce696 [docs] clarify example 2023-10-04 10:14:11 -05:00
Cullen Watson
c4870677c2 [enh]: make last_x_days generic
add mls_only
make radius generic
2023-10-04 10:11:53 -05:00
Cullen Watson
51bde20c3c [chore]: clean up 2023-10-04 08:58:55 -05:00
Zachary Hampton
f8c0dd766d - realtor support 2023-10-03 23:33:53 -07:00
Zachary Hampton
f06a01678c - cli readme update 2023-10-03 22:31:23 -07:00
Zachary Hampton
d2879734e6 - cli update 2023-10-03 22:25:29 -07:00
Zachary Hampton
bf81ef413f - version bump 2023-10-03 22:22:09 -07:00
Zachary Hampton
29664e4eee - cullen merge 2023-10-03 22:21:16 -07:00
Zachary Hampton
088088ae51 - last x days param 2023-10-03 15:05:17 -07:00
Zachary Hampton
40bbf76db1 - realtor radius 2023-10-02 13:58:47 -07:00
Zachary Hampton
1f1ca8068f - realtor.com default 2023-10-02 10:28:13 -07:00
Zachary Hampton
8388d47f73 - version bump 2023-10-01 09:13:37 -07:00
Zachary Hampton
ba503b0ca3 Merge pull request #27 from ddxv/zillow-ua-header
Zillow Request Header: Match observed behaivor in FireFox of not sending sec-ch-ua headers
2023-10-01 09:12:58 -07:00
james
8962d619e1 Match observed behaivor in FireFox of not sending ua-ch headers in request to prevent recent 403 2023-10-01 11:31:51 +08:00
Zachary Hampton
3b7c17b7b5 - zillow proxy support 2023-09-28 18:40:16 -07:00
Zachary Hampton
59317fd6fc Merge pull request #25 from ZacharyHampton/fix/recent-issues
Fix/recent issues
2023-09-28 18:27:04 -07:00
Zachary Hampton
928b431d1f - bump version 2023-09-28 18:25:53 -07:00
Zachary Hampton
896f862137 - zillow flow update 2023-09-28 18:25:47 -07:00
Zachary Hampton
3174f5076c Merge pull request #23 from ZacharyHampton/fix/recent-issues
Fixes & Changes for recent issues
2023-09-28 18:07:55 -07:00
Zachary Hampton
2abbb913a8 - convert posted_time to datetime
- zillow location bug fix
2023-09-28 18:07:42 -07:00
Cullen Watson
73b6d5b33f [fix] zilow tls client 2023-09-28 19:34:01 -05:00
Zachary Hampton
da39c989d9 - version bump 2023-09-28 15:27:36 -07:00
Zachary Hampton
01c53f9399 - redfin bug fix
- add recent features for issues
2023-09-28 15:19:43 -07:00
Zachary Hampton
9200c17df2 - version bump 2023-09-23 10:55:50 -07:00
Zachary Hampton
9e262bf214 Merge remote-tracking branch 'origin/master' 2023-09-23 10:55:29 -07:00
Zachary Hampton
82f78fb578 - zillow bug fix 2023-09-23 10:55:14 -07:00
Cullen Watson
b0e40df00a Update pyproject.toml 2023-09-22 09:51:24 -05:00
Cullen Watson
2fc40e0dad fix: cookie 2023-09-22 09:47:37 -05:00
Zachary Hampton
254f3a68a1 - redfin bug fix 2023-09-21 18:54:03 -07:00
Zachary Hampton
05713c76b0 - redfin bug fix
- .get
2023-09-21 11:27:12 -07:00
Cullen Watson
9120cc9bfe fix: remove line 2023-09-21 13:10:14 -05:00
Cullen Watson
eee4b19515 Merge branch 'master' of https://github.com/ZacharyHampton/HomeHarvest 2023-09-21 13:06:15 -05:00
Cullen Watson
c25961eded fix: KeyEror : [minBaths] 2023-09-21 13:06:06 -05:00
Zachary Hampton
0884c3d163 Update README.md 2023-09-21 09:55:29 -07:00
Cullen Watson
8f37bfdeb8 chore: version number 2023-09-21 11:19:23 -05:00
Cullen Watson
48c2338276 fix: keyerror 2023-09-21 11:18:37 -05:00
Cullen Watson
f58a1f4a74 docs: tryhomeharvest.com 2023-09-21 10:57:11 -05:00
Zachary Hampton
4cef926d7d Merge pull request #14 from ZacharyHampton/keep_duplicates_flag
Keep duplicates flag
2023-09-20 20:27:08 -07:00
Cullen Watson
e82eeaa59f docs: add keep duplicates flag 2023-09-20 20:25:50 -05:00
Cullen Watson
644f16b25b feat: keep duplicates flag 2023-09-20 20:24:18 -05:00
Cullen Watson
e9ddc6df92 docs: update tutorial vid for release v0.2.7 2023-09-19 22:18:49 -05:00
Cullen Watson
50fb1c391d docs: update property schema 2023-09-19 21:35:37 -05:00
Cullen Watson
4f91f9dadb chore: version number 2023-09-19 21:17:12 -05:00
Zachary Hampton
66e55173b1 Merge pull request #13 from ZacharyHampton/simplify_fields
fix: simplify fields
2023-09-19 19:16:18 -07:00
Cullen Watson
f6054e8746 fix: simplify fields 2023-09-19 21:13:20 -05:00
Cullen Watson
e8d9235ee6 chore: update version number 2023-09-19 16:43:59 -05:00
Cullen Watson
043f091158 fix: keyerror on address 2023-09-19 16:43:17 -05:00
Cullen Watson
eae8108978 docs: change cmd 2023-09-19 16:18:01 -05:00
Zachary Hampton
0a39357a07 Merge pull request #12 from ZacharyHampton/proxy_bug
fix: proxy add to session correctly
2023-09-19 14:07:25 -07:00
Cullen Watson
8f06d46ddb chore: version number 2023-09-19 16:07:06 -05:00
Cullen Watson
0dae14ccfc fix: proxy add to session correctly 2023-09-19 16:05:14 -05:00
Zachary Hampton
9aaabdd5d8 Merge pull request #11 from ZacharyHampton/proxy_support
Proxy support
2023-09-19 13:50:14 -07:00
Cullen Watson
cdf41fe9f2 fix: remove self.proxy 2023-09-19 15:49:50 -05:00
Cullen Watson
1f0feb836d refactor: move proxy to session 2023-09-19 15:48:46 -05:00
Cullen Watson
5f31beda46 chore: version number 2023-09-19 15:44:41 -05:00
Cullen Watson
fd9cdea499 feat: proxy support 2023-09-19 15:43:24 -05:00
Zachary Hampton
93a1cbe17f Merge pull request #10 from ZacharyHampton/cli_homeharvest
add cli
2023-09-19 13:07:27 -07:00
Cullen Watson
49d27943c4 add cli 2023-09-19 15:01:39 -05:00
Zachary Hampton
05fca9b7e6 Update README.md 2023-09-19 11:08:08 -07:00
Zachary Hampton
20ce44fb3a - redfin limiting bug fix 2023-09-19 10:37:10 -07:00
Zachary Hampton
52017c1bb5 Merge pull request #9 from ZacharyHampton/redfin_rental_support
feat(redfin): rental support
2023-09-19 10:28:02 -07:00
Cullen Watson
dba1c03081 feat(redfin): add sold listing_type 2023-09-19 12:27:13 -05:00
Cullen Watson
1fc2d8c549 feat(redfin): rental support 2023-09-19 11:58:20 -05:00
Zachary Hampton
02d112eea0 Merge pull request #8 from ZacharyHampton/fix/zillow-location-validation
- zillow location validation
2023-09-19 09:33:33 -07:00
Zachary Hampton
30e510882b - version bump and excel support 2023-09-19 09:26:52 -07:00
Zachary Hampton
78b56c2cac - zillow location validation 2023-09-19 09:25:08 -07:00
Cullen Watson
087854a688 Merge branch 'master' of https://github.com/ZacharyHampton/HomeHarvest 2023-09-19 00:04:03 -05:00
Cullen Watson
80586467a8 docs:add guide 2023-09-18 23:53:10 -05:00
Cullen Watson
3494b152b8 docs: change install cmd 2023-09-18 23:32:51 -05:00
Cullen Watson
6c6fef80ed chore: change version number 2023-09-18 23:16:54 -05:00
Cullen Watson
62e3321277 fix(zillow): test case 2023-09-18 22:59:49 -05:00
Zachary Hampton
80186ee8c5 Merge remote-tracking branch 'origin/master'
# Conflicts:
#	homeharvest/__init__.py
2023-09-18 20:28:16 -07:00
Zachary Hampton
3ec47c5b6a - invalid test cases
- redfin and realtor bug fixes
- dupe check bug fix
2023-09-18 20:28:03 -07:00
Cullen Watson
42e8ac4de9 fix: drop dups if cols exist 2023-09-18 22:24:14 -05:00
Cullen Watson
e1917009ae docs: add gif 2023-09-18 21:47:55 -05:00
Zachary Hampton
7297f0eb33 Merge pull request #6 from ZacharyHampton/tidy_up_readme
Minor fixes
2023-09-18 19:04:08 -07:00
Cullen Watson
2eec389838 docs: add logo 2023-09-18 21:02:12 -05:00
Cullen Watson
b01162161d chore: merge 2023-09-18 20:09:28 -05:00
Cullen Watson
906ce92685 Merge remote-tracking branch 'origin' into tidy_up_readme 2023-09-18 20:01:59 -05:00
Cullen Watson
cc76e067b2 fix: lat/long KeyError 2023-09-18 20:01:55 -05:00
Zachary Hampton
1f0c351974 Merge pull request #4 from ZacharyHampton/tidy_up_readme
docs: readme
2023-09-18 17:47:13 -07:00
Zachary Hampton
a1684f87db Update pyproject.toml 2023-09-18 17:46:58 -07:00
Zachary Hampton
2ae3ebe28e Merge pull request #5 from ZacharyHampton/ZacharyHampton-patch-1
Update README.md
2023-09-18 17:45:48 -07:00
Zachary Hampton
ae3961514b Update README.md 2023-09-18 17:45:14 -07:00
Cullen Watson
0621b01d9a docs: readme 2023-09-18 19:40:49 -05:00
Cullen Watson
fbbd56d930 docs: remove proxy usage 2023-09-18 19:39:22 -05:00
Cullen Watson
82092faa28 docs: readme 2023-09-18 19:35:38 -05:00
Zachary Hampton
8f90a80b0a - lat lon on realtor & redfin 2023-09-18 16:22:47 -07:00
Zachary Hampton
d5b4d80f96 Merge pull request #3 from ZacharyHampton/all_3_sites
Check dups with city, street_address, unit
2023-09-18 16:00:27 -07:00
Cullen Watson
086bcfd224 fix: check for suite 2023-09-18 17:57:15 -05:00
Cullen Watson
4726764482 refactor: merge master 2023-09-18 17:46:05 -05:00
Cullen Watson
ca260fd2b4 fix: filter dup on street, unit, city 2023-09-18 17:42:16 -05:00
Zachary Hampton
94e5b090da - refactor 2023-09-18 15:22:43 -07:00
Zachary Hampton
d0a6a66b6a Merge pull request #2 from ZacharyHampton/all_3_sites
feat: run all 3 sites with one call
2023-09-18 15:17:50 -07:00
Cullen Watson
8e140a0e45 chore: format 2023-09-18 17:04:54 -05:00
Cullen Watson
588689c230 fix: normalize unit num 2023-09-18 17:04:34 -05:00
Cullen Watson
c7a4bfd5e4 feat: run all 3 sites with one scrape_property() call 2023-09-18 16:18:22 -05:00
Zachary Hampton
fe351ab57c Merge pull request #1 from ZacharyHampton/zillow_backend_ep 2023-09-18 13:52:43 -07:00
Cullen Watson
5d0f519a85 chore: update version number 2023-09-18 15:44:13 -05:00
Cullen Watson
869d7e7c51 refator(realtor): fit to updated models 2023-09-18 15:43:44 -05:00
Cullen Watson
ffd3ce6aed reactor(redfin) 2023-09-18 14:36:18 -05:00
Cullen Watson
471e53118e refactor(redfin): fit to use updated models 2023-09-18 14:07:37 -05:00
Cullen Watson
dc8c15959f fix: use zillow backend ep 2023-09-18 13:38:17 -05:00
Zachary Hampton
10c01f373e Update README.md
try with replit
2023-09-18 10:01:52 -07:00
Zachary Hampton
fd01bfb8b8 Update README.md 2023-09-18 08:45:31 -07:00
21 changed files with 1784 additions and 982 deletions

View File

@@ -30,4 +30,4 @@ jobs:
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
password: ${{ secrets.PYPI_API_TOKEN }}

3
.gitignore vendored
View File

@@ -3,4 +3,5 @@
**/__pycache__/
**/.pytest_cache/
*.pyc
/.ipynb_checkpoints/
/.ipynb_checkpoints/
*.csv

21
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,21 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-added-large-files
- id: check-yaml
- repo: https://github.com/adrienverge/yamllint
rev: v1.29.0
hooks:
- id: yamllint
verbose: true # create awareness of linter findings
args: ["-d", "{extends: relaxed, rules: {line-length: {max: 120}}}"]
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
language_version: python
args: [--line-length=120, --quiet]

View File

@@ -1,73 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "cb48903e-5021-49fe-9688-45cd0bc05d0f",
"metadata": {},
"outputs": [],
"source": [
"from homeharvest import scrape_property\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "156488ce-0d5f-43c5-87f4-c33e9c427860",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_columns', None) # Show all columns\n",
"pd.set_option('display.max_rows', None) # Show all rows\n",
"pd.set_option('display.width', None) # Auto-adjust display width to fit console\n",
"pd.set_option('display.max_colwidth', 50) # Limit max column width to 50 characters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1c8b9744-8606-4e9b-8add-b90371a249a7",
"metadata": {},
"outputs": [],
"source": [
"scrape_property(\n",
" location=\"dallas\", site_name=\"zillow\", listing_type=\"for_sale\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab7b4c21-da1d-4713-9df4-d7425d8ce21e",
"metadata": {},
"outputs": [],
"source": [
"scrape_property(\n",
" location=\"dallas\", site_name=\"redfin\", listing_type=\"for_sale\"\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

147
README.md
View File

@@ -1,33 +1,148 @@
# HomeHarvest
<img src="https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/d1a2bf8b-09f5-4c57-b33a-0ada8a34f12d" width="400">
**HomeHarvest** aims to be the top Python real estate scraping library.
**HomeHarvest** is a real estate scraping library that extracts and formats data in the style of MLS listings.
_**Under Consideration**: We're looking into the possibility of an Excel plugin to cater to a broader audience._
**Not technical?** Try out the web scraping tool on our site at [tryhomeharvest.com](https://tryhomeharvest.com).
*Looking to build a data-focused software product?* **[Book a call](https://bunsly.com)** *to work with us.*
## HomeHarvest Features
- **Source**: Fetches properties directly from **Realtor.com**.
- **Data Format**: Structures data to resemble MLS listings.
- **Export Flexibility**: Options to save as either CSV or Excel.
[Video Guide for HomeHarvest](https://youtu.be/J1qgNPgmSLI) - _updated for release v0.3.4_
![homeharvest](https://github.com/ZacharyHampton/HomeHarvest/assets/78247585/b3d5d727-e67b-4a9f-85d8-1e65fd18620a)
## Installation
```bash
pip install --upgrade homeharvest
pip install -U homeharvest
```
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
## Example Usage
```
## Usage
### Python
```py
from homeharvest import scrape_property
from datetime import datetime
# Generate filename based on current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"HomeHarvest_{current_timestamp}.csv"
properties = scrape_property(
location="85281", site_name="zillow", listing_type="for_rent"
location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent, pending)
past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",
# foreclosure=True
# mls_only=True, # only fetch MLS listings
)
print(properties)
print(f"Number of properties: {len(properties)}")
# Export to csv
properties.to_csv(filename, index=False)
print(properties.head())
```
### Site Name Options
## Output
```plaintext
>>> properties.head()
MLS MLS # Status Style ... COEDate LotSFApx PrcSqft Stories
0 SDCA 230018348 SOLD CONDOS ... 2023-10-03 290110 803 2
1 SDCA 230016614 SOLD TOWNHOMES ... 2023-10-03 None 838 3
2 SDCA 230016367 SOLD CONDOS ... 2023-10-03 30056 649 1
3 MRCA NDP2306335 SOLD SINGLE_FAMILY ... 2023-10-03 7519 661 2
4 SDCA 230014532 SOLD CONDOS ... 2023-10-03 None 752 1
[5 rows x 22 columns]
```
- `zillow`
- `redfin`
- `realtor.com`
### Parameters for `scrape_property()`
```
Required
├── location (str): The address in various formats - this could be just a zip code, a full address, or city/state, etc.
└── listing_type (option): Choose the type of listing.
- 'for_rent'
- 'for_sale'
- 'sold'
- 'pending'
### Listing Types
Optional
├── radius (decimal): Radius in miles to find comparable properties based on individual addresses.
│ Example: 5.5 (fetches properties within a 5.5-mile radius if location is set to a specific address; otherwise, ignored)
├── past_days (integer): Number of past days to filter properties. Utilizes 'last_sold_date' for 'sold' listing types, and 'list_date' for others (for_rent, for_sale).
│ Example: 30 (fetches properties listed/sold in the last 30 days)
├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required.
| (use this to get properties in chunks as there's a 10k result limit)
│ Format for both must be "YYYY-MM-DD".
│ Example: "2023-05-01", "2023-05-15" (fetches properties listed/sold between these dates)
├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings)
├── foreclosure (True/False): If set, fetches only foreclosures
└── proxy (string): In format 'http://user:pass@host:port'
```
- `for_rent`
- `for_sale`
- `sold`
### Property Schema
```plaintext
Property
├── Basic Information:
│ ├── property_url
│ ├── mls
│ ├── mls_id
│ └── status
├── Address Details:
│ ├── street
│ ├── unit
│ ├── city
│ ├── state
│ └── zip_code
├── Property Description:
│ ├── style
│ ├── beds
│ ├── full_baths
│ ├── half_baths
│ ├── sqft
│ ├── year_built
│ ├── stories
│ └── lot_sqft
├── Property Listing Details:
│ ├── days_on_mls
│ ├── list_price
│ ├── list_date
│ ├── pending_date
│ ├── sold_price
│ ├── last_sold_date
│ ├── price_per_sqft
│ ├── parking_garage
│ └── hoa_fee
├── Location Details:
│ ├── latitude
│ ├── longitude
├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone
```
### Exceptions
The following exceptions may be raised when using HomeHarvest:
- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD

View File

@@ -0,0 +1,141 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "cb48903e-5021-49fe-9688-45cd0bc05d0f",
"metadata": {
"is_executing": true
},
"outputs": [],
"source": [
"from homeharvest import scrape_property\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "156488ce-0d5f-43c5-87f4-c33e9c427860",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_columns', None) # Show all columns\n",
"pd.set_option('display.max_rows', None) # Show all rows\n",
"pd.set_option('display.width', None) # Auto-adjust display width to fit console\n",
"pd.set_option('display.max_colwidth', 50) # Limit max column width to 50 characters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1c8b9744-8606-4e9b-8add-b90371a249a7",
"metadata": {},
"outputs": [],
"source": [
"# check for sale properties\n",
"scrape_property(\n",
" location=\"dallas\",\n",
" listing_type=\"for_sale\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aaf86093",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"# search a specific address\n",
"scrape_property(\n",
" location=\"2530 Al Lipscomb Way\",\n",
" listing_type=\"for_sale\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab7b4c21-da1d-4713-9df4-d7425d8ce21e",
"metadata": {},
"outputs": [],
"source": [
"# check rentals\n",
"scrape_property(\n",
" location=\"chicago, illinois\",\n",
" listing_type=\"for_rent\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af280cd3",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"# check sold properties\n",
"properties = scrape_property(\n",
" location=\"90210\",\n",
" listing_type=\"sold\",\n",
" past_days=10\n",
")\n",
"display(properties)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "628c1ce2",
"metadata": {
"collapsed": false,
"is_executing": true,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"# display clickable URLs\n",
"from IPython.display import display, HTML\n",
"properties['property_url'] = '<a href=\"' + properties['property_url'] + '\" target=\"_blank\">' + properties['property_url'] + '</a>'\n",
"\n",
"html = properties.to_html(escape=False)\n",
"truncate_width = f'<style>.dataframe td {{ max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}</style>{html}'\n",
"display(HTML(truncate_width))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,20 @@
from homeharvest import scrape_property
from datetime import datetime
# Generate filename based on current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"HomeHarvest_{current_timestamp}.csv"
properties = scrape_property(
location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent)
past_days=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent)
# pending_or_contingent=True # use on for_sale listings to find pending / contingent listings
# mls_only=True, # only fetch MLS listings
# proxy="http://user:pass@host:port" # use a proxy to change your IP address
)
print(f"Number of properties: {len(properties)}")
# Export to csv
properties.to_csv(filename, index=False)
print(properties.head())

View File

@@ -1,117 +1,54 @@
from .core.scrapers.redfin import RedfinScraper
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.zillow import ZillowScraper
from .core.scrapers.models import ListingType, Property, Building, SiteName
from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType
from typing import Union
import warnings
import pandas as pd
_scrapers = {
"redfin": RedfinScraper,
"realtor.com": RealtorScraper,
"zillow": ZillowScraper,
}
def validate_input(site_name: str, listing_type: str) -> None:
if site_name.lower() not in _scrapers:
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(
f"Provided listing type, '{listing_type}', does not exist."
)
def get_ordered_properties(result: Union[Building, Property]) -> list[str]:
if isinstance(result, Property):
return [
"listing_type",
"address_one",
"city",
"state",
"zip_code",
"address_two",
"url",
"property_type",
"price",
"beds",
"baths",
"square_feet",
"price_per_square_foot",
"lot_size",
"stories",
"year_built",
"agent_name",
"mls_id",
"description",
]
elif isinstance(result, Building):
return [
"address_one",
"city",
"state",
"zip_code",
"address_two",
"url",
"num_units",
"min_unit_price",
"max_unit_price",
"avg_unit_price",
"listing_type",
]
return []
def process_result(result: Union[Building, Property]) -> pd.DataFrame:
prop_data = result.__dict__
address_data = prop_data["address"]
prop_data["site_name"] = prop_data["site_name"]
prop_data["listing_type"] = prop_data["listing_type"].value
prop_data["property_type"] = prop_data["property_type"].value.lower() if prop_data.get("property_type") else None
prop_data["address_one"] = address_data.address_one
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip_code
prop_data["address_two"] = address_data.address_two
del prop_data["address"]
properties_df = pd.DataFrame([prop_data])
properties_df = properties_df[get_ordered_properties(result)]
return properties_df
from .core.scrapers import ScraperInput
from .utils import process_result, ordered_properties, validate_input, validate_dates
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.models import ListingType
def scrape_property(
location: str,
site_name: str,
listing_type: str = "for_sale", #: for_sale, for_rent, sold
listing_type: str = "for_sale",
radius: float = None,
mls_only: bool = False,
past_days: int = None,
proxy: str = None,
date_from: str = None,
date_to: str = None,
foreclosure: bool = None,
) -> pd.DataFrame:
"""
Scrape property from various sites from a given location and listing type.
:returns: pd.DataFrame
:param location: US Location (e.g. 'San Francisco, CA', 'Cook County, IL', '85281', '2530 Al Lipscomb Way')
:param site_name: Site name (e.g. 'realtor.com', 'zillow', 'redfin')
:param listing_type: Listing type (e.g. 'for_sale', 'for_rent', 'sold')
:return: pd.DataFrame containing properties
Scrape properties from Realtor.com based on a given location and listing type.
:param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way")
:param listing_type: Listing Type (for_sale, for_rent, sold)
:param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses.
:param mls_only: If set, fetches only listings with MLS IDs.
:param past_days: Get properties sold or listed (dependent on your listing_type) in the last _ days.
:param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates. format: 2021-01-28
:param proxy: Proxy to use for scraping
"""
validate_input(site_name, listing_type)
validate_input(listing_type)
validate_dates(date_from, date_to)
scraper_input = ScraperInput(
location=location,
listing_type=ListingType[listing_type.upper()],
site_name=site_name.lower(),
proxy=proxy,
radius=radius,
mls_only=mls_only,
last_x_days=past_days,
date_from=date_from,
date_to=date_to,
foreclosure=foreclosure,
)
site = _scrapers[site_name.lower()](scraper_input)
site = RealtorScraper(scraper_input)
results = site.search()
properties_dfs = [process_result(result) for result in results]
if not properties_dfs:
return pd.DataFrame()
return pd.concat(properties_dfs, ignore_index=True)
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
return pd.concat(properties_dfs, ignore_index=True, axis=0)[ordered_properties]

85
homeharvest/cli.py Normal file
View File

@@ -0,0 +1,85 @@
import argparse
import datetime
from homeharvest import scrape_property
def main():
parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
parser.add_argument("location", type=str, help="Location to scrape (e.g., San Francisco, CA)")
parser.add_argument(
"-l",
"--listing_type",
type=str,
default="for_sale",
choices=["for_sale", "for_rent", "sold", "pending"],
help="Listing type to scrape",
)
parser.add_argument(
"-o",
"--output",
type=str,
default="excel",
choices=["excel", "csv"],
help="Output format",
)
parser.add_argument(
"-f",
"--filename",
type=str,
default=None,
help="Name of the output file (without extension)",
)
parser.add_argument("-p", "--proxy", type=str, default=None, help="Proxy to use for scraping")
parser.add_argument(
"-d",
"--days",
type=int,
default=None,
help="Sold/listed in last _ days filter.",
)
parser.add_argument(
"-r",
"--radius",
type=float,
default=None,
help="Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses.",
)
parser.add_argument(
"-m",
"--mls_only",
action="store_true",
help="If set, fetches only MLS listings.",
)
args = parser.parse_args()
result = scrape_property(
args.location,
args.listing_type,
radius=args.radius,
proxy=args.proxy,
mls_only=args.mls_only,
past_days=args.days,
)
if not args.filename:
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
args.filename = f"HomeHarvest_{timestamp}"
if args.output == "excel":
output_filename = f"{args.filename}.xlsx"
result.to_excel(output_filename, index=False)
print(f"Excel file saved as {output_filename}")
elif args.output == "csv":
output_filename = f"{args.filename}.csv"
result.to_csv(output_filename, index=False)
print(f"CSV file saved as {output_filename}")
if __name__ == "__main__":
main()

View File

@@ -1,5 +1,6 @@
from dataclasses import dataclass
import requests
import uuid
from .models import Property, ListingType, SiteName
@@ -7,31 +8,68 @@ from .models import Property, ListingType, SiteName
class ScraperInput:
location: str
listing_type: ListingType
site_name: str
proxy_url: str | None = None
radius: float | None = None
mls_only: bool | None = None
proxy: str | None = None
last_x_days: int | None = None
date_from: str | None = None
date_to: str | None = None
foreclosure: bool | None = None
class Scraper:
def __init__(self, scraper_input: ScraperInput):
def __init__(
self,
scraper_input: ScraperInput,
session: requests.Session = None,
):
self.location = scraper_input.location
self.listing_type = scraper_input.listing_type
self.session = requests.Session()
if not session:
self.session = requests.Session()
self.session.headers.update(
{
"auth": f"Bearer {self.get_access_token()}",
"apollographql-client-name": "com.move.Realtor-apollo-ios",
}
)
else:
self.session = session
if scraper_input.proxy:
proxy_url = scraper_input.proxy
proxies = {"http": proxy_url, "https": proxy_url}
self.session.proxies.update(proxies)
self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name
self.radius = scraper_input.radius
self.last_x_days = scraper_input.last_x_days
self.mls_only = scraper_input.mls_only
self.date_from = scraper_input.date_from
self.date_to = scraper_input.date_to
self.foreclosure = scraper_input.foreclosure
if scraper_input.proxy_url:
self.session.proxies = {
"http": scraper_input.proxy_url,
"https": scraper_input.proxy_url,
}
def search(self) -> list[Property]:
...
def search(self) -> list[Property]: ...
@staticmethod
def _parse_home(home) -> Property:
...
def _parse_home(home) -> Property: ...
def handle_location(self):
...
def handle_location(self): ...
def get_access_token(self):
url = "https://graph.realtor.com/auth/token"
payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
headers = {
"Host": "graph.realtor.com",
"x-client-version": "24.20.4.149916",
"accept": "*/*",
"content-type": "Application/json",
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0",
"accept-language": "en-US,en;q=0.9",
}
response = requests.post(url, headers=headers, data=payload)
data = response.json()
return data["access_token"]

View File

@@ -1,5 +1,6 @@
from dataclasses import dataclass
from enum import Enum
from typing import Optional
class SiteName(Enum):
@@ -7,78 +8,92 @@ class SiteName(Enum):
REDFIN = "redfin"
REALTOR = "realtor.com"
@classmethod
def get_by_value(cls, value):
for item in cls:
if item.value == value:
return item
raise ValueError(f"{value} not found in {cls}")
class ListingType(Enum):
FOR_SALE = "for_sale"
FOR_RENT = "for_rent"
SOLD = "sold"
FOR_SALE = "FOR_SALE"
FOR_RENT = "FOR_RENT"
PENDING = "PENDING"
SOLD = "SOLD"
class PropertyType(Enum):
HOUSE = "HOUSE"
CONDO = "CONDO"
TOWNHOUSE = "TOWNHOUSE"
SINGLE_FAMILY = "SINGLE_FAMILY"
MULTI_FAMILY = "MULTI_FAMILY"
MANUFACTURED = "MANUFACTURED"
APARTMENT = "APARTMENT"
BUILDING = "BUILDING"
COMMERCIAL = "COMMERCIAL"
CONDO_TOWNHOME = "CONDO_TOWNHOME"
CONDO_TOWNHOME_ROWHOME_COOP = "CONDO_TOWNHOME_ROWHOME_COOP"
CONDO = "CONDO"
CONDOS = "CONDOS"
COOP = "COOP"
DUPLEX_TRIPLEX = "DUPLEX_TRIPLEX"
FARM = "FARM"
INVESTMENT = "INVESTMENT"
LAND = "LAND"
MOBILE = "MOBILE"
MULTI_FAMILY = "MULTI_FAMILY"
RENTAL = "RENTAL"
SINGLE_FAMILY = "SINGLE_FAMILY"
TOWNHOMES = "TOWNHOMES"
OTHER = "OTHER"
@classmethod
def from_int_code(cls, code):
mapping = {
1: cls.HOUSE,
2: cls.CONDO,
3: cls.TOWNHOUSE,
4: cls.MULTI_FAMILY,
5: cls.LAND,
6: cls.OTHER,
8: cls.SINGLE_FAMILY,
13: cls.SINGLE_FAMILY,
}
return mapping.get(code, cls.OTHER)
@dataclass
class Address:
address_one: str
city: str
state: str
zip_code: str
address_two: str | None = None
@dataclass()
class Realty:
site_name: str
address: Address
url: str
listing_type: ListingType | None = None
street: str | None = None
unit: str | None = None
city: str | None = None
state: str | None = None
zip: str | None = None
@dataclass
class Property(Realty):
price: int | None = None
class Description:
primary_photo: str | None = None
alt_photos: list[str] | None = None
style: PropertyType | None = None
beds: int | None = None
baths: float | None = None
stories: int | None = None
baths_full: int | None = None
baths_half: int | None = None
sqft: int | None = None
lot_sqft: int | None = None
sold_price: int | None = None
year_built: int | None = None
square_feet: int | None = None
price_per_square_foot: int | None = None
mls_id: str | None = None
agent_name: str | None = None
property_type: PropertyType | None = None
lot_size: int | None = None
description: str | None = None
garage: float | None = None
stories: int | None = None
@dataclass
class Building(Realty):
num_units: int | None = None
min_unit_price: int | None = None
max_unit_price: int | None = None
avg_unit_price: int | None = None
class Agent:
name: str | None = None
phone: str | None = None
@dataclass
class Property:
property_url: str
mls: str | None = None
mls_id: str | None = None
status: str | None = None
address: Address | None = None
list_price: int | None = None
list_date: str | None = None
pending_date: str | None = None
last_sold_date: str | None = None
prc_sqft: int | None = None
hoa_fee: int | None = None
days_on_mls: int | None = None
description: Description | None = None
latitude: float | None = None
longitude: float | None = None
neighborhoods: Optional[str] = None
agents: list[Agent] = None

View File

@@ -1,54 +1,225 @@
import json
from ..models import Property, Address
from .. import Scraper
from typing import Any, Generator
from ....exceptions import NoResultsFound
"""
homeharvest.realtor.__init__
~~~~~~~~~~~~
This module implements the scraper for realtor.com
"""
from datetime import datetime
from typing import Dict, Union, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from .. import Scraper
from ..models import Property, Address, ListingType, Description, PropertyType, Agent
class RealtorScraper(Scraper):
SEARCH_GQL_URL = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
PROPERTY_URL = "https://www.realtor.com/realestateandhomes-detail/"
PROPERTY_GQL = "https://graph.realtor.com/graphql"
ADDRESS_AUTOCOMPLETE_URL = "https://parser-external.geo.moveaws.com/suggest"
NUM_PROPERTY_WORKERS = 20
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.search_url = "https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-new-communities&schema=vesta"
def handle_location(self):
headers = {
"authority": "parser-external.geo.moveaws.com",
"accept": "*/*",
"accept-language": "en-US,en;q=0.9",
"origin": "https://www.realtor.com",
"referer": "https://www.realtor.com/",
"sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "cross-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
}
params = {
"input": self.location,
"client_id": self.listing_type.value.replace('_', '-'),
"client_id": self.listing_type.value.lower().replace("_", "-"),
"limit": "1",
"area_types": "city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park",
}
response = self.session.get(
"https://parser-external.geo.moveaws.com/suggest",
self.ADDRESS_AUTOCOMPLETE_URL,
params=params,
headers=headers,
)
response_json = response.json()
result = response_json["autocomplete"]
if result is None:
raise NoResultsFound("No results found for location: " + self.location)
if not result:
return None
return result[0]
def handle_listing(self, listing_id: str) -> list[Property]:
query = """query Listing($listing_id: ID!) {
listing(id: $listing_id) {
source {
id
listing_id
}
address {
street_direction
street_number
street_name
street_suffix
unit
city
state_code
postal_code
location {
coordinate {
lat
lon
}
}
}
basic {
sqft
beds
baths_full
baths_half
lot_sqft
sold_price
sold_price
type
price
status
sold_date
list_date
}
details {
year_built
stories
garage
permalink
}
media {
photos {
href
}
}
}
}"""
variables = {"listing_id": listing_id}
payload = {
"query": query,
"variables": variables,
}
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response_json = response.json()
property_info = response_json["data"]["listing"]
mls = (
property_info["source"].get("id")
if "source" in property_info and isinstance(property_info["source"], dict)
else None
)
able_to_get_lat_long = (
property_info
and property_info.get("address")
and property_info["address"].get("location")
and property_info["address"]["location"].get("coordinate")
)
list_date_str = (
property_info["basic"]["list_date"].split("T")[0] if property_info["basic"].get("list_date") else None
)
last_sold_date_str = (
property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get("sold_date") else None
)
pending_date_str = property_info["pending_date"].split("T")[0] if property_info.get("pending_date") else None
list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
pending_date = datetime.strptime(pending_date_str, "%Y-%m-%d") if pending_date_str else None
today = datetime.now()
days_on_mls = None
status = property_info["basic"]["status"].lower()
if list_date:
if status == "sold" and last_sold_date:
days_on_mls = (last_sold_date - list_date).days
elif status in ("for_sale", "for_rent"):
days_on_mls = (today - list_date).days
if days_on_mls and days_on_mls < 0:
days_on_mls = None
property_id = property_info["details"]["permalink"]
agents = self.get_agents(property_id)
listing = Property(
mls=mls,
mls_id=(
property_info["source"].get("listing_id")
if "source" in property_info and isinstance(property_info["source"], dict)
else None
),
property_url=f"{self.PROPERTY_URL}{property_id}",
status=property_info["basic"]["status"].upper(),
list_price=property_info["basic"]["price"],
list_date=list_date,
prc_sqft=(
property_info["basic"].get("price") / property_info["basic"].get("sqft")
if property_info["basic"].get("price") and property_info["basic"].get("sqft")
else None
),
last_sold_date=last_sold_date,
pending_date=pending_date,
latitude=property_info["address"]["location"]["coordinate"].get("lat") if able_to_get_lat_long else None,
longitude=property_info["address"]["location"]["coordinate"].get("lon") if able_to_get_lat_long else None,
address=self._parse_address(property_info, search_type="handle_listing"),
description=Description(
alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])),
style=property_info["basic"].get("type", "").upper(),
beds=property_info["basic"].get("beds"),
baths_full=property_info["basic"].get("baths_full"),
baths_half=property_info["basic"].get("baths_half"),
sqft=property_info["basic"].get("sqft"),
lot_sqft=property_info["basic"].get("lot_sqft"),
sold_price=property_info["basic"].get("sold_price"),
year_built=property_info["details"].get("year_built"),
garage=property_info["details"].get("garage"),
stories=property_info["details"].get("stories"),
),
days_on_mls=days_on_mls,
agents=agents,
)
return [listing]
def get_latest_listing_id(self, property_id: str) -> str | None:
query = """query Property($property_id: ID!) {
property(id: $property_id) {
listings {
listing_id
primary
}
}
}
"""
variables = {"property_id": property_id}
payload = {
"query": query,
"variables": variables,
}
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response_json = response.json()
property_info = response_json["data"]["property"]
if property_info["listings"] is None:
return None
primary_listing = next(
(listing for listing in property_info["listings"] if listing["primary"]),
None,
)
if primary_listing:
return primary_listing["listing_id"]
else:
return property_info["listings"][0]["listing_id"]
def handle_address(self, property_id: str) -> list[Property]:
"""
Handles a specific address & returns one property
"""
query = """query Property($property_id: ID!) {
property(id: $property_id) {
property_id
@@ -60,22 +231,20 @@ class RealtorScraper(Scraper):
stories
}
address {
address_validation_code
city
country
county
line
postal_code
state_code
street_direction
street_name
street_number
street_name
street_suffix
street_post_direction
unit_value
unit
unit_descriptor
zip
city
state_code
postal_code
location {
coordinate {
lat
lon
}
}
}
basic {
baths
@@ -93,176 +262,489 @@ class RealtorScraper(Scraper):
units
year_built
}
primary_photo {
href
}
photos {
href
}
}
}"""
variables = {
'property_id': property_id
}
variables = {"property_id": property_id}
agents = self.get_agents(property_id)
payload = {
'query': query,
'variables': variables,
"query": query,
"variables": variables,
}
response = self.session.post(self.search_url, json=payload)
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response_json = response.json()
property_info = response_json['data']['property']
property_info = response_json["data"]["property"]
return [Property(
site_name=self.site_name,
address=Address(
address_one=property_info['address']['line'],
city=property_info['address']['city'],
state=property_info['address']['state_code'],
zip_code=property_info['address']['postal_code'],
),
url="https://www.realtor.com/realestateandhomes-detail/" + property_info['details']['permalink'],
beds=property_info['basic']['beds'],
baths=property_info['basic']['baths'],
stories=property_info['details']['stories'],
year_built=property_info['details']['year_built'],
square_feet=property_info['basic']['sqft'],
price_per_square_foot=property_info['basic']['price'] / property_info['basic']['sqft']
if property_info['basic']['sqft'] is not None and
property_info['basic']['price'] is not None
else None,
price=property_info['basic']['price'],
mls_id=property_id,
listing_type=self.listing_type,
lot_size=property_info['public_record']['lot_size'] if property_info['public_record'] is not None else None,
)]
return [
Property(
mls_id=property_id,
property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
address=self._parse_address(property_info, search_type="handle_address"),
description=self._parse_description(property_info),
agents=agents,
)
]
def handle_area(self, variables: dict, return_total: bool = False) -> list[Property] | int:
query = """query Home_search(
$city: String,
$county: [String],
$state_code: String,
$postal_code: String
$offset: Int,
) {
home_search(
query: {
city: $city
county: $county
postal_code: $postal_code
state_code: $state_code
status: %s
def general_search(self, variables: dict, search_type: str) -> Dict[str, Union[int, list[Property]]]:
"""
Handles a location area & returns a list of properties
"""
results_query = """{
count
total
results {
pending_date
property_id
list_date
status
last_sold_price
last_sold_date
list_price
price_per_sqft
flags {
is_contingent
is_pending
}
limit: 200
offset: $offset
) {
count
total
results {
property_id
description {
baths
beds
lot_sqft
sqft
text
sold_price
stories
year_built
garage
unit_number
floor_number
}
location {
address {
city
country
line
postal_code
state_code
state
street_direction
street_name
street_number
street_post_direction
street_suffix
unit
description {
type
sqft
beds
baths_full
baths_half
lot_sqft
sold_price
year_built
garage
sold_price
type
name
stories
}
source {
id
listing_id
}
hoa {
fee
}
location {
address {
street_direction
street_number
street_name
street_suffix
unit
city
state_code
postal_code
coordinate {
lon
lat
}
}
list_price
price_per_sqft
source {
id
neighborhoods {
name
}
}
primary_photo {
href
}
photos {
href
}
}
}""" % self.listing_type.value
}
}"""
date_param = ""
if self.listing_type == ListingType.SOLD:
if self.date_from and self.date_to:
date_param = f'sold_date: {{ min: "{self.date_from}", max: "{self.date_to}" }}'
elif self.last_x_days:
date_param = f'sold_date: {{ min: "$today-{self.last_x_days}D" }}'
else:
if self.date_from and self.date_to:
date_param = f'list_date: {{ min: "{self.date_from}", max: "{self.date_to}" }}'
elif self.last_x_days:
date_param = f'list_date: {{ min: "$today-{self.last_x_days}D" }}'
sort_param = (
"sort: [{ field: sold_date, direction: desc }]"
if self.listing_type == ListingType.SOLD
else "sort: [{ field: list_date, direction: desc }]"
)
pending_or_contingent_param = (
"or_filters: { contingent: true, pending: true }" if self.listing_type == ListingType.PENDING else ""
)
listing_type = ListingType.FOR_SALE if self.listing_type == ListingType.PENDING else self.listing_type
is_foreclosure = ""
if variables.get("foreclosure") is True:
is_foreclosure = "foreclosure: true"
elif variables.get("foreclosure") is False:
is_foreclosure = "foreclosure: false"
if search_type == "comps": #: comps search, came from an address
query = """query Property_search(
$coordinates: [Float]!
$radius: String!
$offset: Int!,
) {
home_search(
query: {
%s
nearby: {
coordinates: $coordinates
radius: $radius
}
status: %s
%s
%s
}
%s
limit: 200
offset: $offset
) %s""" % (
is_foreclosure,
listing_type.value.lower(),
date_param,
pending_or_contingent_param,
sort_param,
results_query,
)
elif search_type == "area": #: general search, came from a general location
query = """query Home_search(
$city: String,
$county: [String],
$state_code: String,
$postal_code: String
$offset: Int,
) {
home_search(
query: {
%s
city: $city
county: $county
postal_code: $postal_code
state_code: $state_code
status: %s
%s
%s
}
%s
limit: 200
offset: $offset
) %s""" % (
is_foreclosure,
listing_type.value.lower(),
date_param,
pending_or_contingent_param,
sort_param,
results_query,
)
else: #: general search, came from an address
query = (
"""query Property_search(
$property_id: [ID]!
$offset: Int!,
) {
property_search(
query: {
property_id: $property_id
}
limit: 1
offset: $offset
) %s"""
% results_query
)
payload = {
'query': query,
'variables': variables,
"query": query,
"variables": variables,
}
response = self.session.post(self.search_url, json=payload)
response = self.session.post(self.SEARCH_GQL_URL, json=payload)
response.raise_for_status()
response_json = response.json()
if return_total:
return response_json['data']['home_search']['total']
search_key = "home_search" if "home_search" in query else "property_search"
properties: list[Property] = []
for result in response_json['data']['home_search']['results']:
realty_property = Property(
address=Address(
address_one=result['location']['address']['line'],
city=result['location']['address']['city'],
state=result['location']['address']['state_code'],
zip_code=result['location']['address']['postal_code'],
address_two=result['location']['address']['unit'],
),
site_name=self.site_name,
url="https://www.realtor.com/realestateandhomes-detail/" + result['property_id'],
beds=result['description']['beds'],
baths=result['description']['baths'],
stories=result['description']['stories'],
year_built=result['description']['year_built'],
square_feet=result['description']['sqft'],
price_per_square_foot=result['price_per_sqft'],
price=result['list_price'],
mls_id=result['property_id'],
listing_type=self.listing_type,
lot_size=result['description']['lot_sqft'],
if (
response_json is None
or "data" not in response_json
or response_json["data"] is None
or search_key not in response_json["data"]
or response_json["data"][search_key] is None
or "results" not in response_json["data"][search_key]
):
return {"total": 0, "properties": []}
def process_property(result: dict) -> Property | None:
mls = result["source"].get("id") if "source" in result and isinstance(result["source"], dict) else None
if not mls and self.mls_only:
return
able_to_get_lat_long = (
result
and result.get("location")
and result["location"].get("address")
and result["location"]["address"].get("coordinate")
)
properties.append(realty_property)
is_pending = result["flags"].get("is_pending") or result["flags"].get("is_contingent")
return properties
if is_pending and self.listing_type != ListingType.PENDING:
return
def search(self):
location_info = self.handle_location()
location_type = location_info["area_type"]
property_id = result["property_id"]
agents = self.get_agents(property_id)
if location_type == 'address':
property_id = location_info['mpr_id']
return self.handle_address(property_id)
realty_property = Property(
mls=mls,
mls_id=(
result["source"].get("listing_id")
if "source" in result and isinstance(result["source"], dict)
else None
),
property_url=(
f"{self.PROPERTY_URL}{property_id}"
if self.listing_type != ListingType.FOR_RENT
else f"{self.PROPERTY_URL}M{property_id}?listing_status=rental"
),
status="PENDING" if is_pending else result["status"].upper(),
list_price=result["list_price"],
list_date=result["list_date"].split("T")[0] if result.get("list_date") else None,
prc_sqft=result.get("price_per_sqft"),
last_sold_date=result.get("last_sold_date"),
hoa_fee=result["hoa"]["fee"] if result.get("hoa") and isinstance(result["hoa"], dict) else None,
latitude=result["location"]["address"]["coordinate"].get("lat") if able_to_get_lat_long else None,
longitude=result["location"]["address"]["coordinate"].get("lon") if able_to_get_lat_long else None,
address=self._parse_address(result, search_type="general_search"),
description=self._parse_description(result),
days_on_mls=self.calculate_days_on_mls(result),
agents=agents,
)
return realty_property
offset = 0
search_variables = {
'city': location_info.get('city'),
'county': location_info.get('county'),
'state_code': location_info.get('state_code'),
'postal_code': location_info.get('postal_code'),
'offset': offset,
}
total = self.handle_area(search_variables, return_total=True)
homes = []
with ThreadPoolExecutor(max_workers=10) as executor:
with ThreadPoolExecutor(max_workers=self.NUM_PROPERTY_WORKERS) as executor:
futures = [
executor.submit(
self.handle_area, variables=search_variables | {'offset': i}, return_total=False
) for i in range(0, total, 200)
executor.submit(process_property, result) for result in response_json["data"][search_key]["results"]
]
for future in as_completed(futures):
homes.extend(future.result())
result = future.result()
if result:
properties.append(result)
return {
"total": response_json["data"][search_key]["total"],
"properties": properties,
}
def search(self):
location_info = self.handle_location()
if not location_info:
return []
location_type = location_info["area_type"]
search_variables = {
"offset": 0,
}
search_type = (
"comps"
if self.radius and location_type == "address"
else "address" if location_type == "address" and not self.radius else "area"
)
if location_type == "address":
if not self.radius: #: single address search, non comps
property_id = location_info["mpr_id"]
search_variables |= {"property_id": property_id}
gql_results = self.general_search(search_variables, search_type=search_type)
if gql_results["total"] == 0:
listing_id = self.get_latest_listing_id(property_id)
if listing_id is None:
return self.handle_address(property_id)
else:
return self.handle_listing(listing_id)
else:
return gql_results["properties"]
else: #: general search, comps (radius)
if not location_info.get("centroid"):
return []
coordinates = list(location_info["centroid"].values())
search_variables |= {
"coordinates": coordinates,
"radius": "{}mi".format(self.radius),
}
elif location_type == "postal_code":
search_variables |= {
"postal_code": location_info.get("postal_code"),
}
else: #: general search, location
search_variables |= {
"city": location_info.get("city"),
"county": location_info.get("county"),
"state_code": location_info.get("state_code"),
"postal_code": location_info.get("postal_code"),
}
if self.foreclosure:
search_variables["foreclosure"] = self.foreclosure
result = self.general_search(search_variables, search_type=search_type)
total = result["total"]
homes = result["properties"]
with ThreadPoolExecutor(max_workers=10) as executor:
futures = [
executor.submit(
self.general_search,
variables=search_variables | {"offset": i},
search_type=search_type,
)
for i in range(200, min(total, 10000), 200)
]
for future in as_completed(futures):
homes.extend(future.result()["properties"])
return homes
def get_agents(self, property_id: str) -> list[Agent]:
payload = f'{{"query":"query GetHome($property_id: ID!) {{\\n home(property_id: $property_id) {{\\n __typename\\n\\n consumerAdvertisers: consumer_advertisers {{\\n __typename\\n type\\n advertiserId: advertiser_id\\n name\\n phone\\n type\\n href\\n slogan\\n photo {{\\n __typename\\n href\\n }}\\n showRealtorLogo: show_realtor_logo\\n hours\\n }}\\n\\n\\n }}\\n}}\\n","variables":{{"property_id":"{property_id}"}}}}'
response = self.session.post(self.PROPERTY_GQL, data=payload)
data = response.json()
try:
ads = data["data"]["home"]["consumerAdvertisers"]
except (KeyError, TypeError):
return []
agents = [Agent(name=ad["name"], phone=ad["phone"]) for ad in ads]
return agents
@staticmethod
def _parse_neighborhoods(result: dict) -> Optional[str]:
neighborhoods_list = []
neighborhoods = result["location"].get("neighborhoods", [])
if neighborhoods:
for neighborhood in neighborhoods:
name = neighborhood.get("name")
if name:
neighborhoods_list.append(name)
return ", ".join(neighborhoods_list) if neighborhoods_list else None
@staticmethod
def handle_none_safely(address_part):
if address_part is None:
return ""
return address_part
def _parse_address(self, result: dict, search_type):
if search_type == "general_search":
address = result["location"]["address"]
else:
address = result["address"]
return Address(
street=" ".join(
[
self.handle_none_safely(address.get("street_number")),
self.handle_none_safely(address.get("street_direction")),
self.handle_none_safely(address.get("street_name")),
self.handle_none_safely(address.get("street_suffix")),
]
).strip(),
unit=address["unit"],
city=address["city"],
state=address["state_code"],
zip=address["postal_code"],
)
@staticmethod
def _parse_description(result: dict) -> Description:
description_data = result.get("description", {})
if description_data is None or not isinstance(description_data, dict):
description_data = {}
style = description_data.get("type", "")
if style is not None:
style = style.upper()
primary_photo = ""
if result and "primary_photo" in result:
primary_photo_info = result["primary_photo"]
if primary_photo_info and "href" in primary_photo_info:
primary_photo_href = primary_photo_info["href"]
primary_photo = primary_photo_href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75")
return Description(
primary_photo=primary_photo,
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
style=PropertyType(style) if style else None,
beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"),
baths_half=description_data.get("baths_half"),
sqft=description_data.get("sqft"),
lot_sqft=description_data.get("lot_sqft"),
sold_price=description_data.get("sold_price"),
year_built=description_data.get("year_built"),
garage=description_data.get("garage"),
stories=description_data.get("stories"),
)
@staticmethod
def calculate_days_on_mls(result: dict) -> Optional[int]:
list_date_str = result.get("list_date")
list_date = datetime.strptime(list_date_str.split("T")[0], "%Y-%m-%d") if list_date_str else None
last_sold_date_str = result.get("last_sold_date")
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
today = datetime.now()
if list_date:
if result["status"] == "sold":
if last_sold_date:
days = (last_sold_date - list_date).days
if days >= 0:
return days
elif result["status"] in ("for_sale", "for_rent"):
days = (today - list_date).days
if days >= 0:
return days
@staticmethod
def process_alt_photos(photos_info):
try:
alt_photos = []
if photos_info:
for photo_info in photos_info:
href = photo_info.get("href", "")
alt_photo_href = href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75")
alt_photos.append(alt_photo_href)
return alt_photos
except Exception:
pass

View File

@@ -1,158 +0,0 @@
import json
from ..models import Property, Address, PropertyType, Building
from .. import Scraper
from typing import Any
class RedfinScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.listing_type = scraper_input.listing_type
def _handle_location(self):
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(
self.location
)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
def get_region_type(match_type: str):
if match_type == "4":
return "2" #: zip
elif match_type == "2":
return "6" #: city
elif match_type == "1":
return "address" #: address, needs to be handled differently
if response_json["payload"]["exactMatch"] is not None:
target = response_json["payload"]["exactMatch"]
else:
target = response_json["payload"]["sections"][0]["rows"][0]
return target["id"].split("_")[1], get_region_type(target["type"])
def _parse_home(self, home: dict, single_search: bool = False) -> Property:
def get_value(key: str) -> Any | None:
if key in home and "value" in home[key]:
return home[key]["value"]
if not single_search:
address = Address(
address_one=get_value("streetLine"),
city=home["city"],
state=home["state"],
zip_code=home["zip"],
)
else:
address_info = home["streetAddress"]
address = Address(
address_one=address_info["assembledAddress"],
city=home["city"],
state=home["state"],
zip_code=home["zip"],
)
url = "https://www.redfin.com{}".format(home["url"])
property_type = home["propertyType"] if "propertyType" in home else None
lot_size_data = home.get("lotSize")
if not isinstance(lot_size_data, int):
lot_size = (
lot_size_data.get("value", None)
if isinstance(lot_size_data, dict)
else None
)
else:
lot_size = lot_size_data
return Property(
site_name=self.site_name,
listing_type=self.listing_type,
address=address,
url=url,
beds=home["beds"] if "beds" in home else None,
baths=home["baths"] if "baths" in home else None,
stories=home["stories"] if "stories" in home else None,
agent_name=get_value("listingAgent"),
description=home["listingRemarks"] if "listingRemarks" in home else None,
year_built=get_value("yearBuilt")
if not single_search
else home["yearBuilt"],
square_feet=get_value("sqFt"),
lot_size=lot_size,
property_type=PropertyType.from_int_code(home.get("propertyType")),
price_per_square_foot=get_value("pricePerSqFt"),
price=get_value("price"),
mls_id=get_value("mlsId"),
)
def _parse_building(self, building: dict) -> Building:
return Building(
address=Address(
address_one=" ".join(
[
building['address']['streetNumber'],
building['address']['directionalPrefix'],
building['address']['streetName'],
building['address']['streetType'],
]
),
city=building['address']['city'],
state=building['address']['stateOrProvinceCode'],
zip_code=building['address']['postalCode'],
address_two=" ".join(
[
building['address']['unitType'],
building['address']['unitValue'],
]
)
),
site_name=self.site_name,
url="https://www.redfin.com{}".format(building["url"]),
listing_type=self.listing_type,
num_units=building["numUnitsForSale"],
)
def handle_address(self, home_id: str):
"""
EPs:
https://www.redfin.com/stingray/api/home/details/initialInfo?al=1&path=/TX/Austin/70-Rainey-St-78701/unit-1608/home/147337694
https://www.redfin.com/stingray/api/home/details/mainHouseInfoPanelInfo?propertyId=147337694&accessLevel=3
https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=147337694&accessLevel=3
https://www.redfin.com/stingray/api/home/details/belowTheFold?propertyId=147337694&accessLevel=3
"""
url = "https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId={}&accessLevel=3".format(
home_id
)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
parsed_home = self._parse_home(
response_json["payload"]["addressSectionInfo"], single_search=True
)
return [parsed_home]
def search(self):
region_id, region_type = self._handle_location()
if region_type == "address":
home_id = region_id
return self.handle_address(home_id)
url = "https://www.redfin.com/stingray/api/gis?al=1&region_id={}&region_type={}".format(
region_id, region_type
)
response = self.session.get(url)
response_json = json.loads(response.text.replace("{}&&", ""))
homes = [
self._parse_home(home) for home in response_json["payload"]["homes"]
] + [
self._parse_building(building) for building in response_json["payload"]["buildings"].values()
]
return homes

View File

@@ -1,210 +0,0 @@
import re
import json
from ..models import Property, Address, Building, ListingType, PropertyType
from ....exceptions import NoResultsFound, PropertyNotFound
from .. import Scraper
class ZillowScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.listing_type = scraper_input.listing_type
if self.listing_type == ListingType.FOR_SALE:
self.url = f"https://www.zillow.com/homes/for_sale/{self.location}_rb/"
elif self.listing_type == ListingType.FOR_RENT:
self.url = f"https://www.zillow.com/homes/for_rent/{self.location}_rb/"
def search(self):
resp = self.session.get(self.url, headers=self._get_headers())
resp.raise_for_status()
content = resp.text
match = re.search(
r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
content,
re.DOTALL,
)
if not match:
raise NoResultsFound(
"No results were found for Zillow with the given Location."
)
json_str = match.group(1)
data = json.loads(json_str)
if "searchPageState" in data["props"]["pageProps"]:
houses = data["props"]["pageProps"]["searchPageState"]["cat1"][
"searchResults"
]["listResults"]
return [self._parse_home(house) for house in houses]
elif "gdpClientCache" in data["props"]["pageProps"]:
gdp_client_cache = json.loads(data["props"]["pageProps"]["gdpClientCache"])
main_key = list(gdp_client_cache.keys())[0]
property_data = gdp_client_cache[main_key]["property"]
property = self._get_single_property_page(property_data)
return [property]
raise PropertyNotFound("Specific property data not found in the response.")
def _parse_home(self, home: dict):
"""
This method is used when a user enters a generic location & zillow returns more than one property
"""
url = (
f"https://www.zillow.com{home['detailUrl']}"
if "zillow.com" not in home["detailUrl"]
else home["detailUrl"]
)
if "hdpData" in home and "homeInfo" in home["hdpData"]:
price_data = self._extract_price(home)
address = self._extract_address(home)
agent_name = self._extract_agent_name(home)
beds = home["hdpData"]["homeInfo"]["bedrooms"]
baths = home["hdpData"]["homeInfo"]["bathrooms"]
property_type = home["hdpData"]["homeInfo"].get("homeType")
return Property(
site_name=self.site_name,
address=address,
agent_name=agent_name,
url=url,
beds=beds,
baths=baths,
listing_type=self.listing_type,
property_type=PropertyType(property_type),
**price_data,
)
else:
keys = ("addressStreet", "addressCity", "addressState", "addressZipcode")
address_one, city, state, zip_code = (home[key] for key in keys)
address_one, address_two = self._parse_address_two(address_one)
address = Address(address_one, city, state, zip_code, address_two)
building_info = self._extract_building_info(home)
return Building(
site_name=self.site_name, address=address, url=url, **building_info
)
def _get_single_property_page(self, property_data: dict):
"""
This method is used when a user enters the exact location & zillow returns just one property
"""
url = (
f"https://www.zillow.com{property_data['hdpUrl']}"
if "zillow.com" not in property_data["hdpUrl"]
else property_data["hdpUrl"]
)
address_data = property_data["address"]
address_one, address_two = self._parse_address_two(
address_data["streetAddress"]
)
address = Address(
address_one=address_one,
address_two=address_two,
city=address_data["city"],
state=address_data["state"],
zip_code=address_data["zipcode"],
)
property_type = property_data.get("homeType", None)
return Property(
site_name=self.site_name,
address=address,
url=url,
beds=property_data.get("bedrooms", None),
baths=property_data.get("bathrooms", None),
year_built=property_data.get("yearBuilt", None),
price=property_data.get("price", None),
lot_size=property_data.get("lotSize", None),
agent_name=property_data.get("attributionInfo", {}).get("agentName", None),
stories=property_data.get("resoFacts", {}).get("stories", None),
description=property_data.get("description", None),
mls_id=property_data.get("attributionInfo", {}).get("mlsId", None),
price_per_square_foot=property_data.get("resoFacts", {}).get(
"pricePerSquareFoot", None
),
square_feet=property_data.get("livingArea", None),
property_type=PropertyType(property_type),
listing_type=self.listing_type,
)
def _extract_building_info(self, home: dict) -> dict:
num_units = len(home["units"])
prices = [
int(unit["price"].replace("$", "").replace(",", "").split("+")[0])
for unit in home["units"]
]
return {
"listing_type": self.listing_type,
"num_units": len(home["units"]),
"min_unit_price": min(
(
int(unit["price"].replace("$", "").replace(",", "").split("+")[0])
for unit in home["units"]
)
),
"max_unit_price": max(
(
int(unit["price"].replace("$", "").replace(",", "").split("+")[0])
for unit in home["units"]
)
),
"avg_unit_price": sum(prices) // len(prices) if num_units else None,
}
@staticmethod
def _extract_price(home: dict) -> dict:
price = int(home["hdpData"]["homeInfo"]["priceForHDP"])
square_feet = home["hdpData"]["homeInfo"].get("livingArea")
lot_size = home["hdpData"]["homeInfo"].get("lotAreaValue")
price_per_square_foot = price // square_feet if square_feet and price else None
return {
k: v
for k, v in locals().items()
if k in ["price", "square_feet", "lot_size", "price_per_square_foot"]
}
@staticmethod
def _extract_agent_name(home: dict) -> str | None:
broker_str = home.get("brokerName", "")
match = re.search(r"Listing by: (.+)", broker_str)
return match.group(1) if match else None
@staticmethod
def _parse_address_two(address_one: str):
apt_match = re.search(r"(APT\s*.+|#[\s\S]+)$", address_one, re.I)
address_two = apt_match.group().strip() if apt_match else None
address_one = (
address_one.replace(address_two, "").strip() if address_two else address_one
)
return address_one, address_two
@staticmethod
def _extract_address(home: dict) -> Address:
keys = ("streetAddress", "city", "state", "zipcode")
address_one, city, state, zip_code = (
home["hdpData"]["homeInfo"][key] for key in keys
)
address_one, address_two = ZillowScraper._parse_address_two(address_one)
return Address(address_one, city, state, zip_code, address_two=address_two)
@staticmethod
def _get_headers():
return {
"authority": "parser-external.geo.moveaws.com",
"accept": "*/*",
"accept-language": "en-US,en;q=0.9",
"origin": "https://www.zillow.com",
"referer": "https://www.zillow.com/",
"sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "cross-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
}

View File

@@ -1,14 +1,6 @@
class InvalidSite(Exception):
"""Raised when a provided site is does not exist."""
class InvalidListingType(Exception):
"""Raised when a provided listing type is does not exist."""
class NoResultsFound(Exception):
"""Raised when no results are found for the given location"""
class PropertyNotFound(Exception):
"""Raised when no property is found for the given address"""
class InvalidDate(Exception):
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""

101
homeharvest/utils.py Normal file
View File

@@ -0,0 +1,101 @@
import pandas as pd
from datetime import datetime
from .core.scrapers.models import Property, ListingType
from .exceptions import InvalidListingType, InvalidDate
ordered_properties = [
"property_url",
"mls",
"mls_id",
"status",
"style",
"street",
"unit",
"city",
"state",
"zip_code",
"beds",
"full_baths",
"half_baths",
"sqft",
"year_built",
"days_on_mls",
"list_price",
"list_date",
"sold_price",
"last_sold_date",
"lot_sqft",
"price_per_sqft",
"latitude",
"longitude",
"stories",
"hoa_fee",
"parking_garage",
"agent",
"broker",
"broker_phone",
"primary_photo",
"alt_photos",
]
def process_result(result: Property) -> pd.DataFrame:
prop_data = {prop: None for prop in ordered_properties}
prop_data.update(result.__dict__)
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["street"] = address_data.street
prop_data["unit"] = address_data.unit
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip
if "agents" in prop_data:
agents = prop_data["agents"]
if agents:
prop_data["agent"] = agents[0].name
if len(agents) > 1:
prop_data["broker"] = agents[1].name
prop_data["broker_phone"] = agents[1].phone
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
description = result.description
prop_data["primary_photo"] = description.primary_photo
prop_data["alt_photos"] = ", ".join(description.alt_photos)
prop_data["style"] = description.style.value
prop_data["beds"] = description.beds
prop_data["full_baths"] = description.baths_full
prop_data["half_baths"] = description.baths_half
prop_data["sqft"] = description.sqft
prop_data["lot_sqft"] = description.lot_sqft
prop_data["sold_price"] = description.sold_price
prop_data["year_built"] = description.year_built
prop_data["parking_garage"] = description.garage
prop_data["stories"] = description.stories
properties_df = pd.DataFrame([prop_data])
properties_df = properties_df.reindex(columns=ordered_properties)
return properties_df[ordered_properties]
def validate_input(listing_type: str) -> None:
if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")
def validate_dates(date_from: str | None, date_to: str | None) -> None:
if (date_from is not None and date_to is None) or (date_from is None and date_to is not None):
raise InvalidDate("Both date_from and date_to must be provided.")
if date_from and date_to:
try:
date_from_obj = datetime.strptime(date_from, "%Y-%m-%d")
date_to_obj = datetime.strptime(date_to, "%Y-%m-%d")
if date_to_obj < date_from_obj:
raise InvalidDate("date_to must be after date_from.")
except ValueError as e:
raise InvalidDate(f"Invalid date format or range")

453
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "certifi"
@@ -11,88 +11,114 @@ files = [
{file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
]
[[package]]
name = "cfgv"
version = "3.4.0"
description = "Validate configuration and produce human readable error messages."
optional = false
python-versions = ">=3.8"
files = [
{file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
{file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
]
[[package]]
name = "charset-normalizer"
version = "3.2.0"
version = "3.3.0"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
optional = false
python-versions = ">=3.7.0"
files = [
{file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"},
{file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"},
{file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"},
{file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"},
{file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"},
{file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"},
{file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"},
{file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"},
{file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"},
{file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"},
{file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"},
{file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"},
{file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"},
{file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"},
{file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"},
{file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"},
{file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"},
{file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"},
{file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"},
{file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"},
{file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"},
{file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"},
{file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
{file = "charset-normalizer-3.3.0.tar.gz", hash = "sha256:63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6"},
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effe5406c9bd748a871dbcaf3ac69167c38d72db8c9baf3ff954c344f31c4cbe"},
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4162918ef3098851fcd8a628bf9b6a98d10c380725df9e04caf5ca6dd48c847a"},
{file = "charset_normalizer-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0570d21da019941634a531444364f2482e8db0b3425fcd5ac0c36565a64142c8"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5707a746c6083a3a74b46b3a631d78d129edab06195a92a8ece755aac25a3f3d"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:278c296c6f96fa686d74eb449ea1697f3c03dc28b75f873b65b5201806346a69"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4b71f4d1765639372a3b32d2638197f5cd5221b19531f9245fcc9ee62d38f56"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5969baeaea61c97efa706b9b107dcba02784b1601c74ac84f2a532ea079403e"},
{file = "charset_normalizer-3.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3f93dab657839dfa61025056606600a11d0b696d79386f974e459a3fbc568ec"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:db756e48f9c5c607b5e33dd36b1d5872d0422e960145b08ab0ec7fd420e9d649"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:232ac332403e37e4a03d209a3f92ed9071f7d3dbda70e2a5e9cff1c4ba9f0678"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e5c1502d4ace69a179305abb3f0bb6141cbe4714bc9b31d427329a95acfc8bdd"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:2502dd2a736c879c0f0d3e2161e74d9907231e25d35794584b1ca5284e43f596"},
{file = "charset_normalizer-3.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23e8565ab7ff33218530bc817922fae827420f143479b753104ab801145b1d5b"},
{file = "charset_normalizer-3.3.0-cp310-cp310-win32.whl", hash = "sha256:1872d01ac8c618a8da634e232f24793883d6e456a66593135aeafe3784b0848d"},
{file = "charset_normalizer-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:557b21a44ceac6c6b9773bc65aa1b4cc3e248a5ad2f5b914b91579a32e22204d"},
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d7eff0f27edc5afa9e405f7165f85a6d782d308f3b6b9d96016c010597958e63"},
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a685067d05e46641d5d1623d7c7fdf15a357546cbb2f71b0ebde91b175ffc3e"},
{file = "charset_normalizer-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d3d5b7db9ed8a2b11a774db2bbea7ba1884430a205dbd54a32d61d7c2a190fa"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2935ffc78db9645cb2086c2f8f4cfd23d9b73cc0dc80334bc30aac6f03f68f8c"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fe359b2e3a7729010060fbca442ca225280c16e923b37db0e955ac2a2b72a05"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:380c4bde80bce25c6e4f77b19386f5ec9db230df9f2f2ac1e5ad7af2caa70459"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0d1e3732768fecb052d90d62b220af62ead5748ac51ef61e7b32c266cac9293"},
{file = "charset_normalizer-3.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b2919306936ac6efb3aed1fbf81039f7087ddadb3160882a57ee2ff74fd2382"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f8888e31e3a85943743f8fc15e71536bda1c81d5aa36d014a3c0c44481d7db6e"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:82eb849f085624f6a607538ee7b83a6d8126df6d2f7d3b319cb837b289123078"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7b8b8bf1189b3ba9b8de5c8db4d541b406611a71a955bbbd7385bbc45fcb786c"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5adf257bd58c1b8632046bbe43ee38c04e1038e9d37de9c57a94d6bd6ce5da34"},
{file = "charset_normalizer-3.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c350354efb159b8767a6244c166f66e67506e06c8924ed74669b2c70bc8735b1"},
{file = "charset_normalizer-3.3.0-cp311-cp311-win32.whl", hash = "sha256:02af06682e3590ab952599fbadac535ede5d60d78848e555aa58d0c0abbde786"},
{file = "charset_normalizer-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:86d1f65ac145e2c9ed71d8ffb1905e9bba3a91ae29ba55b4c46ae6fc31d7c0d4"},
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3b447982ad46348c02cb90d230b75ac34e9886273df3a93eec0539308a6296d7"},
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:abf0d9f45ea5fb95051c8bfe43cb40cda383772f7e5023a83cc481ca2604d74e"},
{file = "charset_normalizer-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b09719a17a2301178fac4470d54b1680b18a5048b481cb8890e1ef820cb80455"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3d9b48ee6e3967b7901c052b670c7dda6deb812c309439adaffdec55c6d7b78"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edfe077ab09442d4ef3c52cb1f9dab89bff02f4524afc0acf2d46be17dc479f5"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3debd1150027933210c2fc321527c2299118aa929c2f5a0a80ab6953e3bd1908"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f63face3a527284f7bb8a9d4f78988e3c06823f7bea2bd6f0e0e9298ca0403"},
{file = "charset_normalizer-3.3.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24817cb02cbef7cd499f7c9a2735286b4782bd47a5b3516a0e84c50eab44b98e"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c71f16da1ed8949774ef79f4a0260d28b83b3a50c6576f8f4f0288d109777989"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9cf3126b85822c4e53aa28c7ec9869b924d6fcfb76e77a45c44b83d91afd74f9"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:b3b2316b25644b23b54a6f6401074cebcecd1244c0b8e80111c9a3f1c8e83d65"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:03680bb39035fbcffe828eae9c3f8afc0428c91d38e7d61aa992ef7a59fb120e"},
{file = "charset_normalizer-3.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cc152c5dd831641e995764f9f0b6589519f6f5123258ccaca8c6d34572fefa8"},
{file = "charset_normalizer-3.3.0-cp312-cp312-win32.whl", hash = "sha256:b8f3307af845803fb0b060ab76cf6dd3a13adc15b6b451f54281d25911eb92df"},
{file = "charset_normalizer-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:8eaf82f0eccd1505cf39a45a6bd0a8cf1c70dcfc30dba338207a969d91b965c0"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dc45229747b67ffc441b3de2f3ae5e62877a282ea828a5bdb67883c4ee4a8810"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f4a0033ce9a76e391542c182f0d48d084855b5fcba5010f707c8e8c34663d77"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ada214c6fa40f8d800e575de6b91a40d0548139e5dc457d2ebb61470abf50186"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1121de0e9d6e6ca08289583d7491e7fcb18a439305b34a30b20d8215922d43c"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1063da2c85b95f2d1a430f1c33b55c9c17ffaf5e612e10aeaad641c55a9e2b9d"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70f1d09c0d7748b73290b29219e854b3207aea922f839437870d8cc2168e31cc"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:250c9eb0f4600361dd80d46112213dff2286231d92d3e52af1e5a6083d10cad9"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:750b446b2ffce1739e8578576092179160f6d26bd5e23eb1789c4d64d5af7dc7"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:fc52b79d83a3fe3a360902d3f5d79073a993597d48114c29485e9431092905d8"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:588245972aca710b5b68802c8cad9edaa98589b1b42ad2b53accd6910dad3545"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e39c7eb31e3f5b1f88caff88bcff1b7f8334975b46f6ac6e9fc725d829bc35d4"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-win32.whl", hash = "sha256:abecce40dfebbfa6abf8e324e1860092eeca6f7375c8c4e655a8afb61af58f2c"},
{file = "charset_normalizer-3.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a91a981f185721542a0b7c92e9054b7ab4fea0508a795846bc5b0abf8118d4"},
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:67b8cc9574bb518ec76dc8e705d4c39ae78bb96237cb533edac149352c1f39fe"},
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac71b2977fb90c35d41c9453116e283fac47bb9096ad917b8819ca8b943abecd"},
{file = "charset_normalizer-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3ae38d325b512f63f8da31f826e6cb6c367336f95e418137286ba362925c877e"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:542da1178c1c6af8873e143910e2269add130a299c9106eef2594e15dae5e482"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a85aed0b864ac88309b7d94be09f6046c834ef60762a8833b660139cfbad13"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aae32c93e0f64469f74ccc730a7cb21c7610af3a775157e50bbd38f816536b38"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b26ddf78d57f1d143bdf32e820fd8935d36abe8a25eb9ec0b5a71c82eb3895"},
{file = "charset_normalizer-3.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f5d10bae5d78e4551b7be7a9b29643a95aded9d0f602aa2ba584f0388e7a557"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:249c6470a2b60935bafd1d1d13cd613f8cd8388d53461c67397ee6a0f5dce741"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c5a74c359b2d47d26cdbbc7845e9662d6b08a1e915eb015d044729e92e7050b7"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:b5bcf60a228acae568e9911f410f9d9e0d43197d030ae5799e20dca8df588287"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:187d18082694a29005ba2944c882344b6748d5be69e3a89bf3cc9d878e548d5a"},
{file = "charset_normalizer-3.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:81bf654678e575403736b85ba3a7867e31c2c30a69bc57fe88e3ace52fb17b89"},
{file = "charset_normalizer-3.3.0-cp38-cp38-win32.whl", hash = "sha256:85a32721ddde63c9df9ebb0d2045b9691d9750cb139c161c80e500d210f5e26e"},
{file = "charset_normalizer-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:468d2a840567b13a590e67dd276c570f8de00ed767ecc611994c301d0f8c014f"},
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e0fc42822278451bc13a2e8626cf2218ba570f27856b536e00cfa53099724828"},
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09c77f964f351a7369cc343911e0df63e762e42bac24cd7d18525961c81754f4"},
{file = "charset_normalizer-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12ebea541c44fdc88ccb794a13fe861cc5e35d64ed689513a5c03d05b53b7c82"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:805dfea4ca10411a5296bcc75638017215a93ffb584c9e344731eef0dcfb026a"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96c2b49eb6a72c0e4991d62406e365d87067ca14c1a729a870d22354e6f68115"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaf7b34c5bc56b38c931a54f7952f1ff0ae77a2e82496583b247f7c969eb1479"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:619d1c96099be5823db34fe89e2582b336b5b074a7f47f819d6b3a57ff7bdb86"},
{file = "charset_normalizer-3.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0ac5e7015a5920cfce654c06618ec40c33e12801711da6b4258af59a8eff00a"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93aa7eef6ee71c629b51ef873991d6911b906d7312c6e8e99790c0f33c576f89"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7966951325782121e67c81299a031f4c115615e68046f79b85856b86ebffc4cd"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:02673e456dc5ab13659f85196c534dc596d4ef260e4d86e856c3b2773ce09843"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c2af80fb58f0f24b3f3adcb9148e6203fa67dd3f61c4af146ecad033024dde43"},
{file = "charset_normalizer-3.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:153e7b6e724761741e0974fc4dcd406d35ba70b92bfe3fedcb497226c93b9da7"},
{file = "charset_normalizer-3.3.0-cp39-cp39-win32.whl", hash = "sha256:d47ecf253780c90ee181d4d871cd655a789da937454045b17b5798da9393901a"},
{file = "charset_normalizer-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:d97d85fa63f315a8bdaba2af9a6a686e0eceab77b3089af45133252618e70884"},
{file = "charset_normalizer-3.3.0-py3-none-any.whl", hash = "sha256:e46cd37076971c1040fc8c41273a8b3e2c624ce4f2be3f5dfcb7a430c1d3acc2"},
]
[[package]]
@@ -106,6 +132,17 @@ files = [
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
[[package]]
name = "distlib"
version = "0.3.8"
description = "Distribution utilities"
optional = false
python-versions = "*"
files = [
{file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"},
{file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"},
]
[[package]]
name = "exceptiongroup"
version = "1.1.3"
@@ -120,6 +157,36 @@ files = [
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "filelock"
version = "3.13.4"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.8"
files = [
{file = "filelock-3.13.4-py3-none-any.whl", hash = "sha256:404e5e9253aa60ad457cae1be07c0f0ca90a63931200a47d9b6a6af84fd7b45f"},
{file = "filelock-3.13.4.tar.gz", hash = "sha256:d13f466618bfde72bd2c18255e269f72542c6e70e7bac83a0232d6b1cc5c8cf4"},
]
[package.extras]
docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
typing = ["typing-extensions (>=4.8)"]
[[package]]
name = "identify"
version = "2.5.35"
description = "File identification library for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "identify-2.5.35-py2.py3-none-any.whl", hash = "sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e"},
{file = "identify-2.5.35.tar.gz", hash = "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791"},
]
[package.extras]
license = ["ukkonen"]
[[package]]
name = "idna"
version = "3.4"
@@ -143,39 +210,19 @@ files = [
]
[[package]]
name = "numpy"
version = "1.25.2"
description = "Fundamental package for array computing in Python"
name = "nodeenv"
version = "1.8.0"
description = "Node.js virtual environment builder"
optional = false
python-versions = ">=3.9"
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
files = [
{file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"},
{file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"},
{file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"},
{file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"},
{file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"},
{file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"},
{file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"},
{file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"},
{file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"},
{file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"},
{file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"},
{file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"},
{file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"},
{file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"},
{file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"},
{file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"},
{file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"},
{file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"},
{file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"},
{file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"},
{file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"},
{file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"},
{file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"},
{file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"},
{file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
{file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
{file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
]
[package.dependencies]
setuptools = "*"
[[package]]
name = "numpy"
version = "1.26.0"
@@ -219,47 +266,54 @@ files = [
[[package]]
name = "packaging"
version = "23.1"
version = "23.2"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.7"
files = [
{file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
{file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
{file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
]
[[package]]
name = "pandas"
version = "2.1.0"
version = "2.1.1"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
python-versions = ">=3.9"
files = [
{file = "pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242"},
{file = "pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f"},
{file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09"},
{file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d81e1813191070440d4c7a413cb673052b3b4a984ffd86b8dd468c45742d3cc"},
{file = "pandas-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eb20252720b1cc1b7d0b2879ffc7e0542dd568f24d7c4b2347cb035206936421"},
{file = "pandas-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:38f74ef7ebc0ffb43b3d633e23d74882bce7e27bfa09607f3c5d3e03ffd9a4a5"},
{file = "pandas-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cda72cc8c4761c8f1d97b169661f23a86b16fdb240bdc341173aee17e4d6cedd"},
{file = "pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d97daeac0db8c993420b10da4f5f5b39b01fc9ca689a17844e07c0a35ac96b4b"},
{file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c58b1113892e0c8078f006a167cc210a92bdae23322bb4614f2f0b7a4b510f"},
{file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629124923bcf798965b054a540f9ccdfd60f71361255c81fa1ecd94a904b9dd3"},
{file = "pandas-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:70cf866af3ab346a10debba8ea78077cf3a8cd14bd5e4bed3d41555a3280041c"},
{file = "pandas-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53c8c1001f6a192ff1de1efe03b31a423d0eee2e9e855e69d004308e046e694"},
{file = "pandas-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86f100b3876b8c6d1a2c66207288ead435dc71041ee4aea789e55ef0e06408cb"},
{file = "pandas-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28f330845ad21c11db51e02d8d69acc9035edfd1116926ff7245c7215db57957"},
{file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9a6ccf0963db88f9b12df6720e55f337447aea217f426a22d71f4213a3099a6"},
{file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99e678180bc59b0c9443314297bddce4ad35727a1a2656dbe585fd78710b3b9"},
{file = "pandas-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b31da36d376d50a1a492efb18097b9101bdbd8b3fbb3f49006e02d4495d4c644"},
{file = "pandas-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0164b85937707ec7f70b34a6c3a578dbf0f50787f910f21ca3b26a7fd3363437"},
{file = "pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918"},
{file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
{file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
{file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
{file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
{file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
{file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
{file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
{file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
{file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
{file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
{file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
{file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
{file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
{file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
{file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
{file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
{file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
]
[package.dependencies]
numpy = [
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version >= \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@@ -289,6 +343,21 @@ sql-other = ["SQLAlchemy (>=1.4.36)"]
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
xml = ["lxml (>=4.8.0)"]
[[package]]
name = "platformdirs"
version = "4.2.0"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
optional = false
python-versions = ">=3.8"
files = [
{file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"},
{file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"},
]
[package.extras]
docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"]
[[package]]
name = "pluggy"
version = "1.3.0"
@@ -304,6 +373,24 @@ files = [
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
[[package]]
name = "pre-commit"
version = "3.7.0"
description = "A framework for managing and maintaining multi-language pre-commit hooks."
optional = false
python-versions = ">=3.9"
files = [
{file = "pre_commit-3.7.0-py2.py3-none-any.whl", hash = "sha256:5eae9e10c2b5ac51577c3452ec0a490455c45a0533f7960f993a0d01e59decab"},
{file = "pre_commit-3.7.0.tar.gz", hash = "sha256:e209d61b8acdcf742404408531f0c37d49d2c734fd7cff2d6076083d191cb060"},
]
[package.dependencies]
cfgv = ">=2.0.0"
identify = ">=1.0.0"
nodeenv = ">=0.11.1"
pyyaml = ">=5.1"
virtualenv = ">=20.10.0"
[[package]]
name = "pytest"
version = "7.4.2"
@@ -351,6 +438,66 @@ files = [
{file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
]
[[package]]
name = "pyyaml"
version = "6.0.1"
description = "YAML parser and emitter for Python"
optional = false
python-versions = ">=3.6"
files = [
{file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
{file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"},
{file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"},
{file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"},
{file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"},
{file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"},
{file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"},
{file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"},
{file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"},
{file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"},
{file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
]
[[package]]
name = "requests"
version = "2.31.0"
@@ -372,6 +519,22 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "setuptools"
version = "69.5.1"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false
python-versions = ">=3.8"
files = [
{file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"},
{file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"},
]
[package.extras]
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
[[package]]
name = "six"
version = "1.16.0"
@@ -407,13 +570,13 @@ files = [
[[package]]
name = "urllib3"
version = "2.0.4"
version = "2.0.6"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.7"
files = [
{file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"},
{file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"},
{file = "urllib3-2.0.6-py3-none-any.whl", hash = "sha256:7a7c7003b000adf9e7ca2a377c9688bbc54ed41b985789ed576570342a375cd2"},
{file = "urllib3-2.0.6.tar.gz", hash = "sha256:b19e1a85d206b56d7df1d5e683df4a7725252a964e3993648dd0fb5a1c157564"},
]
[package.extras]
@@ -422,7 +585,27 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "virtualenv"
version = "20.25.1"
description = "Virtual Python Environment builder"
optional = false
python-versions = ">=3.7"
files = [
{file = "virtualenv-20.25.1-py3-none-any.whl", hash = "sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a"},
{file = "virtualenv-20.25.1.tar.gz", hash = "sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197"},
]
[package.dependencies]
distlib = ">=0.3.7,<1"
filelock = ">=3.12.2,<4"
platformdirs = ">=3.9.1,<5"
[package.extras]
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "eede625d6d45085e143b0af246cb2ce00cff8579c667be3b63387c8594a5570d"
python-versions = ">=3.10,<3.13"
content-hash = "371781da268d5f61d6e798c023777f337b620e9b07a48c316825d7b998b63f02"

View File

@@ -1,19 +1,23 @@
[tool.poetry]
name = "homeharvest"
version = "0.1.3"
version = "0.3.17"
description = "Real estate scraping library"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/HomeHarvest"
readme = "README.md"
[tool.poetry.scripts]
homeharvest = "homeharvest.cli:main"
[tool.poetry.dependencies]
python = "^3.10"
python = ">=3.10,<3.13"
requests = "^2.31.0"
pandas = "^2.1.0"
pandas = "^2.1.1"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.2"
pre-commit = "^3.7.0"
[build-system]
requires = ["poetry-core"]

View File

@@ -1,12 +1,144 @@
from homeharvest import scrape_property
def test_realtor_pending_or_contingent():
pending_or_contingent_result = scrape_property(location="Surprise, AZ", listing_type="pending")
regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale")
assert all([result is not None for result in [pending_or_contingent_result, regular_result]])
assert len(pending_or_contingent_result) != len(regular_result)
def test_realtor_pending_comps():
pending_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="pending",
)
for_sale_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="for_sale",
)
sold_comps = scrape_property(
location="2530 Al Lipscomb Way",
radius=5,
past_days=180,
listing_type="sold",
)
results = [pending_comps, for_sale_comps, sold_comps]
assert all([result is not None for result in results])
#: assert all lengths are different
assert len(set([len(result) for result in results])) == len(results)
def test_realtor_sold_past():
result = scrape_property(
location="San Diego, CA",
past_days=30,
listing_type="sold",
)
assert result is not None and len(result) > 0
def test_realtor_comps():
result = scrape_property(
location="2530 Al Lipscomb Way",
radius=0.5,
past_days=180,
listing_type="sold",
)
assert result is not None and len(result) > 0
def test_realtor_last_x_days_sold():
days_result_30 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=30)
days_result_10 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=10)
assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len(
days_result_10
)
def test_realtor_date_range_sold():
days_result_30 = scrape_property(
location="Dallas, TX", listing_type="sold", date_from="2023-05-01", date_to="2023-05-28"
)
days_result_60 = scrape_property(
location="Dallas, TX", listing_type="sold", date_from="2023-04-01", date_to="2023-06-10"
)
assert all([result is not None for result in [days_result_30, days_result_60]]) and len(days_result_30) < len(
days_result_60
)
def test_realtor_single_property():
results = [
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
listing_type="for_sale",
),
scrape_property(
location="2530 Al Lipscomb Way",
listing_type="for_sale",
),
]
assert all([result is not None for result in results])
def test_realtor():
results = [
scrape_property(location="2530 Al Lipscomb Way", site_name="realtor.com"),
scrape_property(location="Phoenix, AZ", site_name="realtor.com"), #: does not support "city, state, USA" format
scrape_property(location="Dallas, TX", site_name="realtor.com"), #: does not support "city, state, USA" format
scrape_property(location="85281", site_name="realtor.com"),
scrape_property(
location="2530 Al Lipscomb Way",
listing_type="for_sale",
),
scrape_property(location="Phoenix, AZ", listing_type="for_rent"), #: does not support "city, state, USA" format
scrape_property(location="Dallas, TX", listing_type="sold"), #: does not support "city, state, USA" format
scrape_property(location="85281"),
]
assert all([result is not None for result in results])
def test_realtor_city():
results = scrape_property(
location="Atlanta, GA",
listing_type="for_sale",
)
assert results is not None and len(results) > 0
def test_realtor_bad_address():
bad_results = scrape_property(
location="abceefg ju098ot498hh9",
listing_type="for_sale",
)
if len(bad_results) == 0:
assert True
def test_realtor_foreclosed():
foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=True)
not_foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=False)
assert len(foreclosed) != len(not_foreclosed)
def test_realtor_agent():
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
assert scraped["agent"].nunique() > 1

View File

@@ -1,12 +0,0 @@
from homeharvest import scrape_property
def test_redfin():
results = [
scrape_property(location="2530 Al Lipscomb Way", site_name="redfin"),
scrape_property(location="Phoenix, AZ, USA", site_name="redfin"),
scrape_property(location="Dallas, TX, USA", site_name="redfin"),
scrape_property(location="85281", site_name="redfin"),
]
assert all([result is not None for result in results])

View File

@@ -1,12 +0,0 @@
from homeharvest import scrape_property
def test_zillow():
results = [
scrape_property(location="2530 Al Lipscomb Way", site_name="zillow"),
scrape_property(location="Phoenix, AZ, USA", site_name="zillow"),
scrape_property(location="Dallas, TX, USA", site_name="zillow"),
scrape_property(location="85281", site_name="zillow"),
]
assert all([result is not None for result in results])