feat: add pandas

This commit is contained in:
Cullen Watson
2023-09-17 18:30:37 -05:00
parent b76c659f94
commit 3697b7cf2d
9 changed files with 393 additions and 30 deletions

View File

@@ -1,12 +1,13 @@
from dataclasses import dataclass
import requests
from .models import Property, ListingType
from .models import Property, ListingType, SiteName
@dataclass
class ScraperInput:
location: str
listing_type: ListingType
site_name: SiteName
proxy_url: str | None = None
@@ -14,6 +15,8 @@ class Scraper:
def __init__(self, scraper_input: ScraperInput):
self.location = scraper_input.location
self.session = requests.Session()
self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name
if scraper_input.proxy_url:
self.session.proxies = {

View File

@@ -2,12 +2,43 @@ from dataclasses import dataclass
from enum import Enum
class SiteName(Enum):
ZILLOW = "zillow"
REDFIN = "redfin"
REALTOR = "realtor.com"
class ListingType(Enum):
FOR_SALE = "for_sale"
FOR_RENT = "for_rent"
SOLD = "sold"
class PropertyType(Enum):
HOUSE = "HOUSE"
CONDO = "CONDO"
TOWNHOUSE = "townhousE"
SINGLE_FAMILY = "SINGLE_FAMILY"
MULTI_FAMILY = "MULTI_FAMILY"
LAND = "LAND"
OTHER = "OTHER"
@classmethod
def from_int_code(cls, code):
mapping = {
1: cls.HOUSE,
2: cls.CONDO,
3: cls.TOWNHOUSE,
4: cls.MULTI_FAMILY,
5: cls.LAND,
6: cls.OTHER,
8: cls.SINGLE_FAMILY,
13: cls.SINGLE_FAMILY,
}
return mapping.get(code, cls.OTHER)
@dataclass
class Address:
address_one: str
@@ -18,35 +49,35 @@ class Address:
address_two: str | None = None
@dataclass
class Property:
@dataclass()
class Realty:
site_name: SiteName
address: Address
url: str
listing_type: ListingType | None = None
@dataclass
class Property(Realty):
price: int | None = None
beds: int | None = None
baths: float | None = None
stories: int | None = None
agent_name: str | None = None
year_built: int | None = None
square_feet: int | None = None
price_per_square_foot: int | None = None
year_built: int | None = None
price: int | None = None
mls_id: str | None = None
listing_type: ListingType | None = None
agent_name: str | None = None
property_type: PropertyType | None = None
lot_size: int | None = None
description: str | None = None
@dataclass
class Building:
address: Address
url: str
class Building(Realty):
num_units: int | None = None
min_unit_price: int | None = None
max_unit_price: int | None = None
avg_unit_price: int | None = None
listing_type: str | None = None

View File

@@ -1,5 +1,5 @@
import json
from ..models import Property, Address
from ..models import Property, Address, PropertyType
from .. import Scraper
from typing import Any
@@ -7,6 +7,7 @@ from typing import Any
class RedfinScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.listing_type = scraper_input.listing_type
def _handle_location(self):
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(
@@ -31,8 +32,7 @@ class RedfinScraper(Scraper):
return target["id"].split("_")[1], get_region_type(target["type"])
@staticmethod
def _parse_home(home: dict, single_search: bool = False) -> Property:
def _parse_home(self, home: dict, single_search: bool = False) -> Property:
def get_value(key: str) -> Any | None:
if key in home and "value" in home[key]:
return home[key]["value"]
@@ -53,10 +53,12 @@ class RedfinScraper(Scraper):
state=home["state"],
zip_code=home["zip"],
)
url = "https://www.redfin.com{}".format(home["url"])
property_type = home["propertyType"] if "propertyType" in home else None
return Property(
site_name=self.site_name,
listing_type=self.listing_type,
address=address,
url=url,
beds=home["beds"] if "beds" in home else None,
@@ -68,6 +70,8 @@ class RedfinScraper(Scraper):
if not single_search
else home["yearBuilt"],
square_feet=get_value("sqFt"),
lot_size=home.get("lotSize", {}).get("value", None),
property_type=PropertyType.from_int_code(home.get("propertyType")),
price_per_square_foot=get_value("pricePerSqFt"),
price=get_value("price"),
mls_id=get_value("mlsId"),

View File

@@ -1,13 +1,11 @@
import re
import json
from ..models import Property, Address, Building, ListingType
from ..models import Property, Address, Building, ListingType, PropertyType
from ....exceptions import NoResultsFound, PropertyNotFound
from .. import Scraper
class ZillowScraper(Scraper):
listing_type: ListingType.FOR_SALE
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.listing_type = scraper_input.listing_type
@@ -65,15 +63,17 @@ class ZillowScraper(Scraper):
agent_name = self._extract_agent_name(home)
beds = home["hdpData"]["homeInfo"]["bedrooms"]
baths = home["hdpData"]["homeInfo"]["bathrooms"]
listing_type = home["hdpData"]["homeInfo"].get("homeType")
property_type = home["hdpData"]["homeInfo"].get("homeType")
return Property(
site_name=self.site_name,
address=address,
agent_name=agent_name,
url=url,
beds=beds,
baths=baths,
listing_type=listing_type,
listing_type=self.listing_type,
property_type=PropertyType(property_type),
**price_data,
)
else:
@@ -83,10 +83,11 @@ class ZillowScraper(Scraper):
address = Address(address_one, city, state, zip_code, address_two)
building_info = self._extract_building_info(home)
return Building(address=address, url=url, **building_info)
return Building(
site_name=self.site_name, address=address, url=url, **building_info
)
@classmethod
def _get_single_property_page(cls, property_data: dict):
def _get_single_property_page(self, property_data: dict):
"""
This method is used when a user enters the exact location & zillow returns just one property
"""
@@ -104,8 +105,11 @@ class ZillowScraper(Scraper):
state=address_data["state"],
zip_code=address_data["zipcode"],
)
property_type = property_data.get("homeType", None)
print(property_type)
return Property(
site_name=self.site_name,
address=address,
url=url,
beds=property_data.get("bedrooms", None),
@@ -121,7 +125,8 @@ class ZillowScraper(Scraper):
"pricePerSquareFoot", None
),
square_feet=property_data.get("livingArea", None),
listing_type=property_data.get("homeType", None),
property_type=PropertyType(property_type),
listing_type=self.listing_type,
)
def _extract_building_info(self, home: dict) -> dict: