Compare commits

...

4 Commits

Author SHA1 Message Date
Zachary Hampton
8388d47f73 - version bump 2023-10-01 09:13:37 -07:00
Zachary Hampton
ba503b0ca3 Merge pull request #27 from ddxv/zillow-ua-header
Zillow Request Header: Match observed behaivor in FireFox of not sending sec-ch-ua headers
2023-10-01 09:12:58 -07:00
james
8962d619e1 Match observed behaivor in FireFox of not sending ua-ch headers in request to prevent recent 403 2023-10-01 11:31:51 +08:00
Zachary Hampton
3b7c17b7b5 - zillow proxy support 2023-09-28 18:40:16 -07:00
3 changed files with 12 additions and 8 deletions

View File

@@ -1,5 +1,6 @@
from dataclasses import dataclass
import requests
import tls_client
from .models import Property, ListingType, SiteName
@@ -12,15 +13,20 @@ class ScraperInput:
class Scraper:
def __init__(self, scraper_input: ScraperInput):
def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None):
self.location = scraper_input.location
self.listing_type = scraper_input.listing_type
self.session = requests.Session()
if not session:
self.session = requests.Session()
else:
self.session = session
if scraper_input.proxy:
proxy_url = scraper_input.proxy
proxies = {"http": proxy_url, "https": proxy_url}
self.session.proxies.update(proxies)
self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name

View File

@@ -20,19 +20,17 @@ from datetime import datetime, timedelta
class ZillowScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.session = tls_client.Session(
session = tls_client.Session(
client_identifier="chrome112", random_tls_extension_order=True
)
super().__init__(scraper_input, session)
self.session.headers.update({
'authority': 'www.zillow.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'en-US,en;q=0.9',
'cache-control': 'max-age=0',
'sec-ch-ua': '"Chromium";v="117", "Not)A;Brand";v="24", "Google Chrome";v="117"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.2.17"
version = "0.2.19"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest"