- redfin bug fix

- add recent features for issues
This commit is contained in:
Zachary Hampton
2023-09-28 15:19:43 -07:00
parent 9200c17df2
commit 01c53f9399
6 changed files with 77 additions and 27 deletions

View File

@@ -1,6 +1,7 @@
from dataclasses import dataclass
from enum import Enum
from typing import Tuple
from datetime import datetime
class SiteName(Enum):
@@ -64,6 +65,13 @@ class Address:
zip_code: str | None = None
@dataclass
class Agent:
name: str
phone: str | None = None
email: str | None = None
@dataclass
class Property:
property_url: str
@@ -81,11 +89,11 @@ class Property:
price_per_sqft: int | None = None
mls_id: str | None = None
agent_name: str | None = None
agent: Agent | None = None
img_src: str | None = None
description: str | None = None
status_text: str | None = None
posted_time: str | None = None
posted_time: datetime | None = None
# building for sale
bldg_name: str | None = None
@@ -107,3 +115,6 @@ class Property:
latitude: float | None = None
longitude: float | None = None
sold_date: datetime | None = None
days_on_market: int | None = None

View File

@@ -8,8 +8,9 @@ import json
from typing import Any
from .. import Scraper
from ....utils import parse_address_two, parse_address_one
from ..models import Property, Address, PropertyType, ListingType, SiteName
from ....exceptions import NoResultsFound
from ..models import Property, Address, PropertyType, ListingType, SiteName, Agent
from ....exceptions import NoResultsFound, SearchTooBroad
from datetime import datetime
class RedfinScraper(Scraper):
@@ -30,6 +31,8 @@ class RedfinScraper(Scraper):
return "6" #: city
elif match_type == "1":
return "address" #: address, needs to be handled differently
elif match_type == "11":
return "state"
if "exactMatch" not in response_json["payload"]:
raise NoResultsFound("No results found for location: {}".format(self.location))
@@ -74,6 +77,8 @@ class RedfinScraper(Scraper):
else:
lot_size = lot_size_data
lat_long = get_value("latLong")
return Property(
site_name=self.site_name,
listing_type=self.listing_type,
@@ -88,15 +93,20 @@ class RedfinScraper(Scraper):
sqft_min=get_value("sqFt"),
sqft_max=get_value("sqFt"),
stories=home["stories"] if "stories" in home else None,
agent_name=get_value("listingAgent"),
agent=Agent( #: listingAgent, some have sellingAgent as well
name=home['listingAgent'].get('name') if 'listingAgent' in home else None,
phone=home['listingAgent'].get('phone') if 'listingAgent' in home else None,
),
description=home["listingRemarks"] if "listingRemarks" in home else None,
year_built=get_value("yearBuilt") if not single_search else home.get("yearBuilt"),
lot_area_value=lot_size,
property_type=PropertyType.from_int_code(home.get("propertyType")),
price_per_sqft=get_value("pricePerSqFt") if type(home.get("pricePerSqFt")) != int else home.get("pricePerSqFt"),
mls_id=get_value("mlsId"),
latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
longitude=home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None,
latitude=lat_long.get('latitude') if lat_long else None,
longitude=lat_long.get('longitude') if lat_long else None,
sold_date=datetime.fromtimestamp(home['soldDate'] / 1000) if 'soldDate' in home else None,
days_on_market=get_value("dom")
)
def _handle_rentals(self, region_id, region_type):
@@ -207,6 +217,9 @@ class RedfinScraper(Scraper):
def search(self):
region_id, region_type = self._handle_location()
if region_type == "state":
raise SearchTooBroad("State searches are not supported, please use a more specific location.")
if region_type == "address":
home_id = region_id
return self.handle_address(home_id)

View File

@@ -9,7 +9,7 @@ import json
from .. import Scraper
from ....utils import parse_address_one, parse_address_two
from ....exceptions import GeoCoordsNotFound, NoResultsFound
from ..models import Property, Address, ListingType, PropertyType
from ..models import Property, Address, ListingType, PropertyType, Agent
class ZillowScraper(Scraper):
@@ -165,10 +165,10 @@ class ZillowScraper(Scraper):
home_info["statusType"] if "statusType" in home_info else self.listing_type
),
status_text=result.get("statusText"),
posted_time=result["variableData"]["text"]
posted_time=result["variableData"]["text"] #: TODO: change to datetime
if "variableData" in result
and "text" in result["variableData"]
and result["variableData"]["type"] == "TIME_ON_INFO"
and "text" in result["variableData"]
and result["variableData"]["type"] == "TIME_ON_INFO"
else None,
price_min=home_info.get("price"),
price_max=home_info.get("price"),
@@ -246,7 +246,9 @@ class ZillowScraper(Scraper):
tax_assessed_value=property_data.get("taxAssessedValue"),
lot_area_value=property_data.get("lotAreaValue"),
lot_area_unit=property_data["lotAreaUnits"].lower() if "lotAreaUnits" in property_data else None,
agent_name=property_data.get("attributionInfo", {}).get("agentName"),
agent=Agent(
name=property_data.get("attributionInfo", {}).get("agentName")
),
stories=property_data.get("resoFacts", {}).get("stories"),
mls_id=property_data.get("attributionInfo", {}).get("mlsId"),
beds_min=property_data.get("bedrooms"),
@@ -298,20 +300,21 @@ class ZillowScraper(Scraper):
def _get_headers(self):
headers = {
"authority": "www.zillow.com",
"accept": "*/*",
"accept-language": "en-US,en;q=0.9",
"content-type": "application/json",
"origin": "https://www.zillow.com",
"referer": "https://www.zillow.com",
"sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
'authority': 'www.zillow.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'en-US,en;q=0.9',
'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
}
if self.cookies:
headers['Cookie'] = self.cookies
return headers