HomeHarvest/homeharvest/__init__.py

33 lines
1002 B
Python
Raw Normal View History

2023-09-15 15:17:37 -07:00
from .core.scrapers.redfin import RedfinScraper
2023-09-15 20:58:54 -07:00
from .core.scrapers.realtor import RealtorScraper
2023-09-15 15:17:37 -07:00
from .core.scrapers.types import ListingType, Home
from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType
_scrapers = {
"redfin": RedfinScraper,
2023-09-15 20:58:54 -07:00
"realtor.com": RealtorScraper
2023-09-15 15:17:37 -07:00
}
def scrape_property(
location: str,
2023-09-15 20:58:54 -07:00
site_name: str,
2023-09-15 15:17:37 -07:00
listing_type: str = "for_sale", #: for_sale, for_rent, sold
) -> list[Home]: #: eventually, return pandas dataframe
2023-09-15 15:21:29 -07:00
if site_name.lower() not in _scrapers:
2023-09-15 15:17:37 -07:00
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")
scraper_input = ScraperInput(
location=location,
listing_type=ListingType[listing_type.upper()],
)
2023-09-15 15:21:29 -07:00
site = _scrapers[site_name.lower()](scraper_input)
2023-09-15 15:17:37 -07:00
return site.search()