- zillow proxy support

pull/27/head v0.2.18
Zachary Hampton 2023-09-28 18:40:16 -07:00
parent 59317fd6fc
commit 3b7c17b7b5
3 changed files with 12 additions and 5 deletions

View File

@ -1,5 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass
import requests import requests
import tls_client
from .models import Property, ListingType, SiteName from .models import Property, ListingType, SiteName
@ -12,15 +13,20 @@ class ScraperInput:
class Scraper: class Scraper:
def __init__(self, scraper_input: ScraperInput): def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None):
self.location = scraper_input.location self.location = scraper_input.location
self.listing_type = scraper_input.listing_type self.listing_type = scraper_input.listing_type
if not session:
self.session = requests.Session() self.session = requests.Session()
else:
self.session = session
if scraper_input.proxy: if scraper_input.proxy:
proxy_url = scraper_input.proxy proxy_url = scraper_input.proxy
proxies = {"http": proxy_url, "https": proxy_url} proxies = {"http": proxy_url, "https": proxy_url}
self.session.proxies.update(proxies) self.session.proxies.update(proxies)
self.listing_type = scraper_input.listing_type self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name self.site_name = scraper_input.site_name

View File

@ -20,11 +20,12 @@ from datetime import datetime, timedelta
class ZillowScraper(Scraper): class ZillowScraper(Scraper):
def __init__(self, scraper_input): def __init__(self, scraper_input):
super().__init__(scraper_input) session = tls_client.Session(
self.session = tls_client.Session(
client_identifier="chrome112", random_tls_extension_order=True client_identifier="chrome112", random_tls_extension_order=True
) )
super().__init__(scraper_input, session)
self.session.headers.update({ self.session.headers.update({
'authority': 'www.zillow.com', 'authority': 'www.zillow.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "homeharvest" name = "homeharvest"
version = "0.2.17" version = "0.2.18"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin." description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"] authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest" homepage = "https://github.com/ZacharyHampton/HomeHarvest"