From 0946abd35ac2513879c5a8028c02d26b77836623 Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Fri, 15 Sep 2023 20:58:54 -0700 Subject: [PATCH] - realtor init --- homeharvest/__init__.py | 4 +- homeharvest/core/scrapers/__init__.py | 2 + homeharvest/core/scrapers/realtor/__init__.py | 42 +++++++++++++++++++ tests/test_realtor.py | 10 +++++ tests/test_redfin.py | 1 + 5 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 homeharvest/core/scrapers/realtor/__init__.py create mode 100644 tests/test_realtor.py diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 2c642eb..dd8af13 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -1,4 +1,5 @@ from .core.scrapers.redfin import RedfinScraper +from .core.scrapers.realtor import RealtorScraper from .core.scrapers.types import ListingType, Home from .core.scrapers import ScraperInput from .exceptions import InvalidSite, InvalidListingType @@ -6,13 +7,14 @@ from .exceptions import InvalidSite, InvalidListingType _scrapers = { "redfin": RedfinScraper, + "realtor.com": RealtorScraper } def scrape_property( location: str, + site_name: str, listing_type: str = "for_sale", #: for_sale, for_rent, sold - site_name: str = "redfin", ) -> list[Home]: #: eventually, return pandas dataframe if site_name.lower() not in _scrapers: raise InvalidSite(f"Provided site, '{site_name}', does not exist.") diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 88198e2..7bc9ad3 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -25,3 +25,5 @@ class Scraper: @staticmethod def parse_home(home) -> Home: ... + + def handle_location(self): ... diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py new file mode 100644 index 0000000..39ce8ad --- /dev/null +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -0,0 +1,42 @@ +import json +from ..types import Home, Address +from .. import Scraper +from typing import Any + + +class RealtorScraper(Scraper): + def __init__(self, scraper_input): + super().__init__(scraper_input) + + def handle_location(self): + headers = { + 'authority': 'parser-external.geo.moveaws.com', + 'accept': '*/*', + 'accept-language': 'en-US,en;q=0.9', + 'origin': 'https://www.realtor.com', + 'referer': 'https://www.realtor.com/', + 'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'cross-site', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', + } + + params = { + 'input': self.location, + 'client_id': 'for-sale', + 'limit': '1', + 'area_types': 'city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park', + } + + response = self.session.get('https://parser-external.geo.moveaws.com/suggest', params=params, headers=headers) + response_json = response.json() + + return response_json['autocomplete'][0] + + def search(self): + location_info = self.handle_location() + location_type = location_info['area_type'] + print('a') diff --git a/tests/test_realtor.py b/tests/test_realtor.py new file mode 100644 index 0000000..f43cd16 --- /dev/null +++ b/tests/test_realtor.py @@ -0,0 +1,10 @@ +from homeharvest import scrape_property + + +def test_realtor(): + result = scrape_property( + location="85281", + site_name="realtor.com" + ) + + assert result is not None diff --git a/tests/test_redfin.py b/tests/test_redfin.py index 70dfed4..7f8af3b 100644 --- a/tests/test_redfin.py +++ b/tests/test_redfin.py @@ -3,6 +3,7 @@ from homeharvest import scrape_property def test_redfin(): result = scrape_property( + site_name="redfin", location="85281" )