- realtor init

pull/1/head
Zachary Hampton 2023-09-15 20:58:54 -07:00
parent af1f2fa531
commit 0946abd35a
5 changed files with 58 additions and 1 deletions

View File

@ -1,4 +1,5 @@
from .core.scrapers.redfin import RedfinScraper from .core.scrapers.redfin import RedfinScraper
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.types import ListingType, Home from .core.scrapers.types import ListingType, Home
from .core.scrapers import ScraperInput from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType from .exceptions import InvalidSite, InvalidListingType
@ -6,13 +7,14 @@ from .exceptions import InvalidSite, InvalidListingType
_scrapers = { _scrapers = {
"redfin": RedfinScraper, "redfin": RedfinScraper,
"realtor.com": RealtorScraper
} }
def scrape_property( def scrape_property(
location: str, location: str,
site_name: str,
listing_type: str = "for_sale", #: for_sale, for_rent, sold listing_type: str = "for_sale", #: for_sale, for_rent, sold
site_name: str = "redfin",
) -> list[Home]: #: eventually, return pandas dataframe ) -> list[Home]: #: eventually, return pandas dataframe
if site_name.lower() not in _scrapers: if site_name.lower() not in _scrapers:
raise InvalidSite(f"Provided site, '{site_name}', does not exist.") raise InvalidSite(f"Provided site, '{site_name}', does not exist.")

View File

@ -25,3 +25,5 @@ class Scraper:
@staticmethod @staticmethod
def parse_home(home) -> Home: ... def parse_home(home) -> Home: ...
def handle_location(self): ...

View File

@ -0,0 +1,42 @@
import json
from ..types import Home, Address
from .. import Scraper
from typing import Any
class RealtorScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
def handle_location(self):
headers = {
'authority': 'parser-external.geo.moveaws.com',
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9',
'origin': 'https://www.realtor.com',
'referer': 'https://www.realtor.com/',
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
}
params = {
'input': self.location,
'client_id': 'for-sale',
'limit': '1',
'area_types': 'city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park',
}
response = self.session.get('https://parser-external.geo.moveaws.com/suggest', params=params, headers=headers)
response_json = response.json()
return response_json['autocomplete'][0]
def search(self):
location_info = self.handle_location()
location_type = location_info['area_type']
print('a')

10
tests/test_realtor.py Normal file
View File

@ -0,0 +1,10 @@
from homeharvest import scrape_property
def test_realtor():
result = scrape_property(
location="85281",
site_name="realtor.com"
)
assert result is not None

View File

@ -3,6 +3,7 @@ from homeharvest import scrape_property
def test_redfin(): def test_redfin():
result = scrape_property( result = scrape_property(
site_name="redfin",
location="85281" location="85281"
) )