- realtor init

pull/1/head
Zachary Hampton 2023-09-15 20:58:54 -07:00
parent af1f2fa531
commit 0946abd35a
5 changed files with 58 additions and 1 deletions

View File

@ -1,4 +1,5 @@
from .core.scrapers.redfin import RedfinScraper
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.types import ListingType, Home
from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType
@ -6,13 +7,14 @@ from .exceptions import InvalidSite, InvalidListingType
_scrapers = {
"redfin": RedfinScraper,
"realtor.com": RealtorScraper
}
def scrape_property(
location: str,
site_name: str,
listing_type: str = "for_sale", #: for_sale, for_rent, sold
site_name: str = "redfin",
) -> list[Home]: #: eventually, return pandas dataframe
if site_name.lower() not in _scrapers:
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")

View File

@ -25,3 +25,5 @@ class Scraper:
@staticmethod
def parse_home(home) -> Home: ...
def handle_location(self): ...

View File

@ -0,0 +1,42 @@
import json
from ..types import Home, Address
from .. import Scraper
from typing import Any
class RealtorScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
def handle_location(self):
headers = {
'authority': 'parser-external.geo.moveaws.com',
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9',
'origin': 'https://www.realtor.com',
'referer': 'https://www.realtor.com/',
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
}
params = {
'input': self.location,
'client_id': 'for-sale',
'limit': '1',
'area_types': 'city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park',
}
response = self.session.get('https://parser-external.geo.moveaws.com/suggest', params=params, headers=headers)
response_json = response.json()
return response_json['autocomplete'][0]
def search(self):
location_info = self.handle_location()
location_type = location_info['area_type']
print('a')

10
tests/test_realtor.py Normal file
View File

@ -0,0 +1,10 @@
from homeharvest import scrape_property
def test_realtor():
result = scrape_property(
location="85281",
site_name="realtor.com"
)
assert result is not None

View File

@ -3,6 +3,7 @@ from homeharvest import scrape_property
def test_redfin():
result = scrape_property(
site_name="redfin",
location="85281"
)