Compare commits
2 Commits
32fdc281e3
...
8cfe056f79
Author | SHA1 | Date |
---|---|---|
Zachary Hampton | 8cfe056f79 | |
Zachary Hampton | 1010c743b6 |
|
@ -97,6 +97,8 @@ class Entity:
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Agent(Entity):
|
class Agent(Entity):
|
||||||
|
mls_set: str | None = None
|
||||||
|
nrds_id: str | None = None
|
||||||
phones: list[dict] | AgentPhone | None = None
|
phones: list[dict] | AgentPhone | None = None
|
||||||
email: str | None = None
|
email: str | None = None
|
||||||
href: str | None = None
|
href: str | None = None
|
||||||
|
@ -104,6 +106,7 @@ class Agent(Entity):
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Office(Entity):
|
class Office(Entity):
|
||||||
|
mls_set: str | None = None
|
||||||
email: str | None = None
|
email: str | None = None
|
||||||
href: str | None = None
|
href: str | None = None
|
||||||
phones: list[dict] | AgentPhone | None = None
|
phones: list[dict] | AgentPhone | None = None
|
||||||
|
|
|
@ -114,7 +114,9 @@ class RealtorScraper(Scraper):
|
||||||
advertiser_type = advertiser.get("type")
|
advertiser_type = advertiser.get("type")
|
||||||
if advertiser_type == "seller": #: agent
|
if advertiser_type == "seller": #: agent
|
||||||
processed_advertisers.agent = Agent(
|
processed_advertisers.agent = Agent(
|
||||||
uuid=advertiser.get("mls_set"),
|
uuid=_parse_fulfillment_id(advertiser.get("fulfillment_id")),
|
||||||
|
nrds_id=advertiser.get("nrds_id"),
|
||||||
|
mls_set=advertiser.get("mls_set"),
|
||||||
name=advertiser.get("name"),
|
name=advertiser.get("name"),
|
||||||
email=advertiser.get("email"),
|
email=advertiser.get("email"),
|
||||||
phones=advertiser.get("phones"),
|
phones=advertiser.get("phones"),
|
||||||
|
@ -128,7 +130,8 @@ class RealtorScraper(Scraper):
|
||||||
|
|
||||||
if advertiser.get("office"): #: has an office
|
if advertiser.get("office"): #: has an office
|
||||||
processed_advertisers.office = Office(
|
processed_advertisers.office = Office(
|
||||||
uuid=_parse_fulfillment_id(advertiser["office"].get("fulfillment_id")) or advertiser["office"].get("mls_set"),
|
uuid=_parse_fulfillment_id(advertiser["office"].get("fulfillment_id")),
|
||||||
|
mls_set=advertiser["office"].get("mls_set"),
|
||||||
name=advertiser["office"].get("name"),
|
name=advertiser["office"].get("name"),
|
||||||
email=advertiser["office"].get("email"),
|
email=advertiser["office"].get("email"),
|
||||||
phones=advertiser["office"].get("phones"),
|
phones=advertiser["office"].get("phones"),
|
||||||
|
|
|
@ -109,6 +109,7 @@ _SEARCH_HOMES_DATA_BASE = """{
|
||||||
fulfillment_id
|
fulfillment_id
|
||||||
}
|
}
|
||||||
mls_set
|
mls_set
|
||||||
|
nrds_id
|
||||||
rental_corporation {
|
rental_corporation {
|
||||||
fulfillment_id
|
fulfillment_id
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,11 +46,14 @@ ordered_properties = [
|
||||||
"agent_name",
|
"agent_name",
|
||||||
"agent_email",
|
"agent_email",
|
||||||
"agent_phones",
|
"agent_phones",
|
||||||
|
"agent_mls_set",
|
||||||
|
"agent_nrds_id",
|
||||||
"broker_id",
|
"broker_id",
|
||||||
"broker_name",
|
"broker_name",
|
||||||
"builder_id",
|
"builder_id",
|
||||||
"builder_name",
|
"builder_name",
|
||||||
"office_id",
|
"office_id",
|
||||||
|
"office_mls_set",
|
||||||
"office_name",
|
"office_name",
|
||||||
"office_email",
|
"office_email",
|
||||||
"office_phones",
|
"office_phones",
|
||||||
|
@ -81,6 +84,8 @@ def process_result(result: Property) -> pd.DataFrame:
|
||||||
prop_data["agent_name"] = agent_data.name
|
prop_data["agent_name"] = agent_data.name
|
||||||
prop_data["agent_email"] = agent_data.email
|
prop_data["agent_email"] = agent_data.email
|
||||||
prop_data["agent_phones"] = agent_data.phones
|
prop_data["agent_phones"] = agent_data.phones
|
||||||
|
prop_data["agent_mls_set"] = agent_data.mls_set
|
||||||
|
prop_data["agent_nrds_id"] = agent_data.nrds_id
|
||||||
|
|
||||||
if advertiser_data.broker:
|
if advertiser_data.broker:
|
||||||
broker_data = advertiser_data.broker
|
broker_data = advertiser_data.broker
|
||||||
|
@ -98,6 +103,7 @@ def process_result(result: Property) -> pd.DataFrame:
|
||||||
prop_data["office_name"] = office_data.name
|
prop_data["office_name"] = office_data.name
|
||||||
prop_data["office_email"] = office_data.email
|
prop_data["office_email"] = office_data.email
|
||||||
prop_data["office_phones"] = office_data.phones
|
prop_data["office_phones"] = office_data.phones
|
||||||
|
prop_data["office_mls_set"] = office_data.mls_set
|
||||||
|
|
||||||
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
|
prop_data["price_per_sqft"] = prop_data["prc_sqft"]
|
||||||
prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None
|
prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "homeharvest"
|
name = "homeharvest"
|
||||||
version = "0.4.0"
|
version = "0.4.2"
|
||||||
description = "Real estate scraping library"
|
description = "Real estate scraping library"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/HomeHarvest"
|
homepage = "https://github.com/Bunsly/HomeHarvest"
|
||||||
|
|
|
@ -243,3 +243,13 @@ def test_apartment_list_price():
|
||||||
assert len(results[results[["list_price", "list_price_min", "list_price_max"]].notnull().any(axis=1)]) / len(
|
assert len(results[results[["list_price", "list_price_min", "list_price_max"]].notnull().any(axis=1)]) / len(
|
||||||
results
|
results
|
||||||
) > 0.5
|
) > 0.5
|
||||||
|
|
||||||
|
|
||||||
|
def test_builder_exists():
|
||||||
|
listing = scrape_property(
|
||||||
|
location="18149 W Poston Dr, Surprise, AZ 85387",
|
||||||
|
extra_property_data=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert listing is not None
|
||||||
|
assert listing["builder_name"].nunique() > 0
|
||||||
|
|
Loading…
Reference in New Issue