commit
59317fd6fc
|
@ -21,11 +21,26 @@ from datetime import datetime, timedelta
|
||||||
class ZillowScraper(Scraper):
|
class ZillowScraper(Scraper):
|
||||||
def __init__(self, scraper_input):
|
def __init__(self, scraper_input):
|
||||||
super().__init__(scraper_input)
|
super().__init__(scraper_input)
|
||||||
self.cookies = None
|
|
||||||
self.session = tls_client.Session(
|
self.session = tls_client.Session(
|
||||||
client_identifier="chrome112", random_tls_extension_order=True
|
client_identifier="chrome112", random_tls_extension_order=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.session.headers.update({
|
||||||
|
'authority': 'www.zillow.com',
|
||||||
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||||
|
'accept-language': 'en-US,en;q=0.9',
|
||||||
|
'cache-control': 'max-age=0',
|
||||||
|
'sec-ch-ua': '"Chromium";v="117", "Not)A;Brand";v="24", "Google Chrome";v="117"',
|
||||||
|
'sec-ch-ua-mobile': '?0',
|
||||||
|
'sec-ch-ua-platform': '"Windows"',
|
||||||
|
'sec-fetch-dest': 'document',
|
||||||
|
'sec-fetch-mode': 'navigate',
|
||||||
|
'sec-fetch-site': 'same-origin',
|
||||||
|
'sec-fetch-user': '?1',
|
||||||
|
'upgrade-insecure-requests': '1',
|
||||||
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
|
||||||
|
})
|
||||||
|
|
||||||
if not self.is_plausible_location(self.location):
|
if not self.is_plausible_location(self.location):
|
||||||
raise NoResultsFound("Invalid location input: {}".format(self.location))
|
raise NoResultsFound("Invalid location input: {}".format(self.location))
|
||||||
|
|
||||||
|
@ -48,7 +63,7 @@ class ZillowScraper(Scraper):
|
||||||
return resp.json()["results"] != []
|
return resp.json()["results"] != []
|
||||||
|
|
||||||
def search(self):
|
def search(self):
|
||||||
resp = self.session.get(self.url, headers=self._get_headers())
|
resp = self.session.get(self.url)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
raise HTTPError(
|
raise HTTPError(
|
||||||
f"bad response status code: {resp.status_code}"
|
f"bad response status code: {resp.status_code}"
|
||||||
|
@ -146,12 +161,11 @@ class ZillowScraper(Scraper):
|
||||||
"wants": {"cat1": ["mapResults"]},
|
"wants": {"cat1": ["mapResults"]},
|
||||||
"isDebugRequest": False,
|
"isDebugRequest": False,
|
||||||
}
|
}
|
||||||
resp = self.session.put(url, headers=self._get_headers(), json=payload)
|
resp = self.session.put(url, json=payload)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
raise HTTPError(
|
raise HTTPError(
|
||||||
f"bad response status code: {resp.status_code}"
|
f"bad response status code: {resp.status_code}"
|
||||||
)
|
)
|
||||||
self.cookies = resp.cookies
|
|
||||||
return self._parse_properties(resp.json())
|
return self._parse_properties(resp.json())
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -321,26 +335,3 @@ class ZillowScraper(Scraper):
|
||||||
state=state,
|
state=state,
|
||||||
zip_code=zip_code,
|
zip_code=zip_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_headers(self):
|
|
||||||
headers = {
|
|
||||||
'authority': 'www.zillow.com',
|
|
||||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
||||||
'accept-language': 'en-US,en;q=0.9',
|
|
||||||
'cache-control': 'max-age=0',
|
|
||||||
'cookie': '<your_cookie_here>',
|
|
||||||
'sec-ch-ua': '"Chromium";v="117", "Not)A;Brand";v="24", "Google Chrome";v="117"',
|
|
||||||
'sec-ch-ua-mobile': '?0',
|
|
||||||
'sec-ch-ua-platform': '"Windows"',
|
|
||||||
'sec-fetch-dest': 'document',
|
|
||||||
'sec-fetch-mode': 'navigate',
|
|
||||||
'sec-fetch-site': 'same-origin',
|
|
||||||
'sec-fetch-user': '?1',
|
|
||||||
'upgrade-insecure-requests': '1',
|
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.cookies:
|
|
||||||
headers['Cookie'] = self.cookies
|
|
||||||
|
|
||||||
return headers
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "homeharvest"
|
name = "homeharvest"
|
||||||
version = "0.2.16"
|
version = "0.2.17"
|
||||||
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
||||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||||
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
||||||
|
|
|
@ -11,6 +11,7 @@ def test_zillow():
|
||||||
results = [
|
results = [
|
||||||
scrape_property(location="2530 Al Lipscomb Way", site_name="zillow", listing_type="for_sale"),
|
scrape_property(location="2530 Al Lipscomb Way", site_name="zillow", listing_type="for_sale"),
|
||||||
scrape_property(location="Phoenix, AZ, USA", site_name=["zillow"], listing_type="for_rent"),
|
scrape_property(location="Phoenix, AZ, USA", site_name=["zillow"], listing_type="for_rent"),
|
||||||
|
scrape_property(location="Surprise, AZ", site_name=["zillow"], listing_type="for_sale"),
|
||||||
scrape_property(location="Dallas, TX, USA", site_name="zillow", listing_type="sold"),
|
scrape_property(location="Dallas, TX, USA", site_name="zillow", listing_type="sold"),
|
||||||
scrape_property(location="85281", site_name="zillow"),
|
scrape_property(location="85281", site_name="zillow"),
|
||||||
scrape_property(location="3268 88th st s, Lakewood", site_name="zillow", listing_type="for_rent"),
|
scrape_property(location="3268 88th st s, Lakewood", site_name="zillow", listing_type="for_rent"),
|
||||||
|
|
Loading…
Reference in New Issue