- realtor init
parent
af1f2fa531
commit
0946abd35a
|
@ -1,4 +1,5 @@
|
|||
from .core.scrapers.redfin import RedfinScraper
|
||||
from .core.scrapers.realtor import RealtorScraper
|
||||
from .core.scrapers.types import ListingType, Home
|
||||
from .core.scrapers import ScraperInput
|
||||
from .exceptions import InvalidSite, InvalidListingType
|
||||
|
@ -6,13 +7,14 @@ from .exceptions import InvalidSite, InvalidListingType
|
|||
|
||||
_scrapers = {
|
||||
"redfin": RedfinScraper,
|
||||
"realtor.com": RealtorScraper
|
||||
}
|
||||
|
||||
|
||||
def scrape_property(
|
||||
location: str,
|
||||
site_name: str,
|
||||
listing_type: str = "for_sale", #: for_sale, for_rent, sold
|
||||
site_name: str = "redfin",
|
||||
) -> list[Home]: #: eventually, return pandas dataframe
|
||||
if site_name.lower() not in _scrapers:
|
||||
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
|
||||
|
|
|
@ -25,3 +25,5 @@ class Scraper:
|
|||
|
||||
@staticmethod
|
||||
def parse_home(home) -> Home: ...
|
||||
|
||||
def handle_location(self): ...
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
import json
|
||||
from ..types import Home, Address
|
||||
from .. import Scraper
|
||||
from typing import Any
|
||||
|
||||
|
||||
class RealtorScraper(Scraper):
|
||||
def __init__(self, scraper_input):
|
||||
super().__init__(scraper_input)
|
||||
|
||||
def handle_location(self):
|
||||
headers = {
|
||||
'authority': 'parser-external.geo.moveaws.com',
|
||||
'accept': '*/*',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
'origin': 'https://www.realtor.com',
|
||||
'referer': 'https://www.realtor.com/',
|
||||
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'cross-site',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
params = {
|
||||
'input': self.location,
|
||||
'client_id': 'for-sale',
|
||||
'limit': '1',
|
||||
'area_types': 'city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park',
|
||||
}
|
||||
|
||||
response = self.session.get('https://parser-external.geo.moveaws.com/suggest', params=params, headers=headers)
|
||||
response_json = response.json()
|
||||
|
||||
return response_json['autocomplete'][0]
|
||||
|
||||
def search(self):
|
||||
location_info = self.handle_location()
|
||||
location_type = location_info['area_type']
|
||||
print('a')
|
|
@ -0,0 +1,10 @@
|
|||
from homeharvest import scrape_property
|
||||
|
||||
|
||||
def test_realtor():
|
||||
result = scrape_property(
|
||||
location="85281",
|
||||
site_name="realtor.com"
|
||||
)
|
||||
|
||||
assert result is not None
|
|
@ -3,6 +3,7 @@ from homeharvest import scrape_property
|
|||
|
||||
def test_redfin():
|
||||
result = scrape_property(
|
||||
site_name="redfin",
|
||||
location="85281"
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue