mirror of
https://github.com/Bunsly/HomeHarvest.git
synced 2026-03-04 19:44:29 -08:00
feat: add pandas
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
from dataclasses import dataclass
|
||||
import requests
|
||||
from .models import Property, ListingType
|
||||
from .models import Property, ListingType, SiteName
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScraperInput:
|
||||
location: str
|
||||
listing_type: ListingType
|
||||
site_name: SiteName
|
||||
proxy_url: str | None = None
|
||||
|
||||
|
||||
@@ -14,6 +15,8 @@ class Scraper:
|
||||
def __init__(self, scraper_input: ScraperInput):
|
||||
self.location = scraper_input.location
|
||||
self.session = requests.Session()
|
||||
self.listing_type = scraper_input.listing_type
|
||||
self.site_name = scraper_input.site_name
|
||||
|
||||
if scraper_input.proxy_url:
|
||||
self.session.proxies = {
|
||||
|
||||
@@ -2,12 +2,43 @@ from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class SiteName(Enum):
|
||||
ZILLOW = "zillow"
|
||||
REDFIN = "redfin"
|
||||
REALTOR = "realtor.com"
|
||||
|
||||
|
||||
class ListingType(Enum):
|
||||
FOR_SALE = "for_sale"
|
||||
FOR_RENT = "for_rent"
|
||||
SOLD = "sold"
|
||||
|
||||
|
||||
class PropertyType(Enum):
|
||||
HOUSE = "HOUSE"
|
||||
CONDO = "CONDO"
|
||||
TOWNHOUSE = "townhousE"
|
||||
SINGLE_FAMILY = "SINGLE_FAMILY"
|
||||
MULTI_FAMILY = "MULTI_FAMILY"
|
||||
LAND = "LAND"
|
||||
OTHER = "OTHER"
|
||||
|
||||
@classmethod
|
||||
def from_int_code(cls, code):
|
||||
mapping = {
|
||||
1: cls.HOUSE,
|
||||
2: cls.CONDO,
|
||||
3: cls.TOWNHOUSE,
|
||||
4: cls.MULTI_FAMILY,
|
||||
5: cls.LAND,
|
||||
6: cls.OTHER,
|
||||
8: cls.SINGLE_FAMILY,
|
||||
13: cls.SINGLE_FAMILY,
|
||||
}
|
||||
|
||||
return mapping.get(code, cls.OTHER)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Address:
|
||||
address_one: str
|
||||
@@ -18,35 +49,35 @@ class Address:
|
||||
address_two: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Property:
|
||||
@dataclass()
|
||||
class Realty:
|
||||
site_name: SiteName
|
||||
address: Address
|
||||
url: str
|
||||
listing_type: ListingType | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Property(Realty):
|
||||
price: int | None = None
|
||||
beds: int | None = None
|
||||
baths: float | None = None
|
||||
stories: int | None = None
|
||||
agent_name: str | None = None
|
||||
year_built: int | None = None
|
||||
square_feet: int | None = None
|
||||
price_per_square_foot: int | None = None
|
||||
year_built: int | None = None
|
||||
price: int | None = None
|
||||
mls_id: str | None = None
|
||||
|
||||
listing_type: ListingType | None = None
|
||||
agent_name: str | None = None
|
||||
property_type: PropertyType | None = None
|
||||
lot_size: int | None = None
|
||||
description: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Building:
|
||||
address: Address
|
||||
url: str
|
||||
|
||||
class Building(Realty):
|
||||
num_units: int | None = None
|
||||
min_unit_price: int | None = None
|
||||
max_unit_price: int | None = None
|
||||
avg_unit_price: int | None = None
|
||||
|
||||
listing_type: str | None = None
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import json
|
||||
from ..models import Property, Address
|
||||
from ..models import Property, Address, PropertyType
|
||||
from .. import Scraper
|
||||
from typing import Any
|
||||
|
||||
@@ -7,6 +7,7 @@ from typing import Any
|
||||
class RedfinScraper(Scraper):
|
||||
def __init__(self, scraper_input):
|
||||
super().__init__(scraper_input)
|
||||
self.listing_type = scraper_input.listing_type
|
||||
|
||||
def _handle_location(self):
|
||||
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(
|
||||
@@ -31,8 +32,7 @@ class RedfinScraper(Scraper):
|
||||
|
||||
return target["id"].split("_")[1], get_region_type(target["type"])
|
||||
|
||||
@staticmethod
|
||||
def _parse_home(home: dict, single_search: bool = False) -> Property:
|
||||
def _parse_home(self, home: dict, single_search: bool = False) -> Property:
|
||||
def get_value(key: str) -> Any | None:
|
||||
if key in home and "value" in home[key]:
|
||||
return home[key]["value"]
|
||||
@@ -53,10 +53,12 @@ class RedfinScraper(Scraper):
|
||||
state=home["state"],
|
||||
zip_code=home["zip"],
|
||||
)
|
||||
|
||||
url = "https://www.redfin.com{}".format(home["url"])
|
||||
property_type = home["propertyType"] if "propertyType" in home else None
|
||||
|
||||
return Property(
|
||||
site_name=self.site_name,
|
||||
listing_type=self.listing_type,
|
||||
address=address,
|
||||
url=url,
|
||||
beds=home["beds"] if "beds" in home else None,
|
||||
@@ -68,6 +70,8 @@ class RedfinScraper(Scraper):
|
||||
if not single_search
|
||||
else home["yearBuilt"],
|
||||
square_feet=get_value("sqFt"),
|
||||
lot_size=home.get("lotSize", {}).get("value", None),
|
||||
property_type=PropertyType.from_int_code(home.get("propertyType")),
|
||||
price_per_square_foot=get_value("pricePerSqFt"),
|
||||
price=get_value("price"),
|
||||
mls_id=get_value("mlsId"),
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import re
|
||||
import json
|
||||
from ..models import Property, Address, Building, ListingType
|
||||
from ..models import Property, Address, Building, ListingType, PropertyType
|
||||
from ....exceptions import NoResultsFound, PropertyNotFound
|
||||
from .. import Scraper
|
||||
|
||||
|
||||
class ZillowScraper(Scraper):
|
||||
listing_type: ListingType.FOR_SALE
|
||||
|
||||
def __init__(self, scraper_input):
|
||||
super().__init__(scraper_input)
|
||||
self.listing_type = scraper_input.listing_type
|
||||
@@ -65,15 +63,17 @@ class ZillowScraper(Scraper):
|
||||
agent_name = self._extract_agent_name(home)
|
||||
beds = home["hdpData"]["homeInfo"]["bedrooms"]
|
||||
baths = home["hdpData"]["homeInfo"]["bathrooms"]
|
||||
listing_type = home["hdpData"]["homeInfo"].get("homeType")
|
||||
property_type = home["hdpData"]["homeInfo"].get("homeType")
|
||||
|
||||
return Property(
|
||||
site_name=self.site_name,
|
||||
address=address,
|
||||
agent_name=agent_name,
|
||||
url=url,
|
||||
beds=beds,
|
||||
baths=baths,
|
||||
listing_type=listing_type,
|
||||
listing_type=self.listing_type,
|
||||
property_type=PropertyType(property_type),
|
||||
**price_data,
|
||||
)
|
||||
else:
|
||||
@@ -83,10 +83,11 @@ class ZillowScraper(Scraper):
|
||||
address = Address(address_one, city, state, zip_code, address_two)
|
||||
|
||||
building_info = self._extract_building_info(home)
|
||||
return Building(address=address, url=url, **building_info)
|
||||
return Building(
|
||||
site_name=self.site_name, address=address, url=url, **building_info
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _get_single_property_page(cls, property_data: dict):
|
||||
def _get_single_property_page(self, property_data: dict):
|
||||
"""
|
||||
This method is used when a user enters the exact location & zillow returns just one property
|
||||
"""
|
||||
@@ -104,8 +105,11 @@ class ZillowScraper(Scraper):
|
||||
state=address_data["state"],
|
||||
zip_code=address_data["zipcode"],
|
||||
)
|
||||
property_type = property_data.get("homeType", None)
|
||||
print(property_type)
|
||||
|
||||
return Property(
|
||||
site_name=self.site_name,
|
||||
address=address,
|
||||
url=url,
|
||||
beds=property_data.get("bedrooms", None),
|
||||
@@ -121,7 +125,8 @@ class ZillowScraper(Scraper):
|
||||
"pricePerSquareFoot", None
|
||||
),
|
||||
square_feet=property_data.get("livingArea", None),
|
||||
listing_type=property_data.get("homeType", None),
|
||||
property_type=PropertyType(property_type),
|
||||
listing_type=self.listing_type,
|
||||
)
|
||||
|
||||
def _extract_building_info(self, home: dict) -> dict:
|
||||
|
||||
Reference in New Issue
Block a user