- realtor init
parent
af1f2fa531
commit
0946abd35a
|
@ -1,4 +1,5 @@
|
||||||
from .core.scrapers.redfin import RedfinScraper
|
from .core.scrapers.redfin import RedfinScraper
|
||||||
|
from .core.scrapers.realtor import RealtorScraper
|
||||||
from .core.scrapers.types import ListingType, Home
|
from .core.scrapers.types import ListingType, Home
|
||||||
from .core.scrapers import ScraperInput
|
from .core.scrapers import ScraperInput
|
||||||
from .exceptions import InvalidSite, InvalidListingType
|
from .exceptions import InvalidSite, InvalidListingType
|
||||||
|
@ -6,13 +7,14 @@ from .exceptions import InvalidSite, InvalidListingType
|
||||||
|
|
||||||
_scrapers = {
|
_scrapers = {
|
||||||
"redfin": RedfinScraper,
|
"redfin": RedfinScraper,
|
||||||
|
"realtor.com": RealtorScraper
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def scrape_property(
|
def scrape_property(
|
||||||
location: str,
|
location: str,
|
||||||
|
site_name: str,
|
||||||
listing_type: str = "for_sale", #: for_sale, for_rent, sold
|
listing_type: str = "for_sale", #: for_sale, for_rent, sold
|
||||||
site_name: str = "redfin",
|
|
||||||
) -> list[Home]: #: eventually, return pandas dataframe
|
) -> list[Home]: #: eventually, return pandas dataframe
|
||||||
if site_name.lower() not in _scrapers:
|
if site_name.lower() not in _scrapers:
|
||||||
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
|
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
|
||||||
|
|
|
@ -25,3 +25,5 @@ class Scraper:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_home(home) -> Home: ...
|
def parse_home(home) -> Home: ...
|
||||||
|
|
||||||
|
def handle_location(self): ...
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
import json
|
||||||
|
from ..types import Home, Address
|
||||||
|
from .. import Scraper
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class RealtorScraper(Scraper):
|
||||||
|
def __init__(self, scraper_input):
|
||||||
|
super().__init__(scraper_input)
|
||||||
|
|
||||||
|
def handle_location(self):
|
||||||
|
headers = {
|
||||||
|
'authority': 'parser-external.geo.moveaws.com',
|
||||||
|
'accept': '*/*',
|
||||||
|
'accept-language': 'en-US,en;q=0.9',
|
||||||
|
'origin': 'https://www.realtor.com',
|
||||||
|
'referer': 'https://www.realtor.com/',
|
||||||
|
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
|
||||||
|
'sec-ch-ua-mobile': '?0',
|
||||||
|
'sec-ch-ua-platform': '"Windows"',
|
||||||
|
'sec-fetch-dest': 'empty',
|
||||||
|
'sec-fetch-mode': 'cors',
|
||||||
|
'sec-fetch-site': 'cross-site',
|
||||||
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'input': self.location,
|
||||||
|
'client_id': 'for-sale',
|
||||||
|
'limit': '1',
|
||||||
|
'area_types': 'city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park',
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self.session.get('https://parser-external.geo.moveaws.com/suggest', params=params, headers=headers)
|
||||||
|
response_json = response.json()
|
||||||
|
|
||||||
|
return response_json['autocomplete'][0]
|
||||||
|
|
||||||
|
def search(self):
|
||||||
|
location_info = self.handle_location()
|
||||||
|
location_type = location_info['area_type']
|
||||||
|
print('a')
|
|
@ -0,0 +1,10 @@
|
||||||
|
from homeharvest import scrape_property
|
||||||
|
|
||||||
|
|
||||||
|
def test_realtor():
|
||||||
|
result = scrape_property(
|
||||||
|
location="85281",
|
||||||
|
site_name="realtor.com"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
|
@ -3,6 +3,7 @@ from homeharvest import scrape_property
|
||||||
|
|
||||||
def test_redfin():
|
def test_redfin():
|
||||||
result = scrape_property(
|
result = scrape_property(
|
||||||
|
site_name="redfin",
|
||||||
location="85281"
|
location="85281"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue