From 3b7c17b7b59469b5eeed994570e52e6527d9d912 Mon Sep 17 00:00:00 2001 From: Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com> Date: Thu, 28 Sep 2023 18:40:16 -0700 Subject: [PATCH] - zillow proxy support --- homeharvest/core/scrapers/__init__.py | 10 ++++++++-- homeharvest/core/scrapers/zillow/__init__.py | 5 +++-- pyproject.toml | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 95ed3e1..e900dbe 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -1,5 +1,6 @@ from dataclasses import dataclass import requests +import tls_client from .models import Property, ListingType, SiteName @@ -12,15 +13,20 @@ class ScraperInput: class Scraper: - def __init__(self, scraper_input: ScraperInput): + def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None): self.location = scraper_input.location self.listing_type = scraper_input.listing_type - self.session = requests.Session() + if not session: + self.session = requests.Session() + else: + self.session = session + if scraper_input.proxy: proxy_url = scraper_input.proxy proxies = {"http": proxy_url, "https": proxy_url} self.session.proxies.update(proxies) + self.listing_type = scraper_input.listing_type self.site_name = scraper_input.site_name diff --git a/homeharvest/core/scrapers/zillow/__init__.py b/homeharvest/core/scrapers/zillow/__init__.py index d2d7e81..b77f5f0 100644 --- a/homeharvest/core/scrapers/zillow/__init__.py +++ b/homeharvest/core/scrapers/zillow/__init__.py @@ -20,11 +20,12 @@ from datetime import datetime, timedelta class ZillowScraper(Scraper): def __init__(self, scraper_input): - super().__init__(scraper_input) - self.session = tls_client.Session( + session = tls_client.Session( client_identifier="chrome112", random_tls_extension_order=True ) + super().__init__(scraper_input, session) + self.session.headers.update({ 'authority': 'www.zillow.com', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', diff --git a/pyproject.toml b/pyproject.toml index 4be8834..5faef11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.2.17" +version = "0.2.18" description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin." authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest"