parent
59317fd6fc
commit
3b7c17b7b5
|
@ -1,5 +1,6 @@
|
|||
from dataclasses import dataclass
|
||||
import requests
|
||||
import tls_client
|
||||
from .models import Property, ListingType, SiteName
|
||||
|
||||
|
||||
|
@ -12,15 +13,20 @@ class ScraperInput:
|
|||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, scraper_input: ScraperInput):
|
||||
def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None):
|
||||
self.location = scraper_input.location
|
||||
self.listing_type = scraper_input.listing_type
|
||||
|
||||
self.session = requests.Session()
|
||||
if not session:
|
||||
self.session = requests.Session()
|
||||
else:
|
||||
self.session = session
|
||||
|
||||
if scraper_input.proxy:
|
||||
proxy_url = scraper_input.proxy
|
||||
proxies = {"http": proxy_url, "https": proxy_url}
|
||||
self.session.proxies.update(proxies)
|
||||
|
||||
self.listing_type = scraper_input.listing_type
|
||||
self.site_name = scraper_input.site_name
|
||||
|
||||
|
|
|
@ -20,11 +20,12 @@ from datetime import datetime, timedelta
|
|||
|
||||
class ZillowScraper(Scraper):
|
||||
def __init__(self, scraper_input):
|
||||
super().__init__(scraper_input)
|
||||
self.session = tls_client.Session(
|
||||
session = tls_client.Session(
|
||||
client_identifier="chrome112", random_tls_extension_order=True
|
||||
)
|
||||
|
||||
super().__init__(scraper_input, session)
|
||||
|
||||
self.session.headers.update({
|
||||
'authority': 'www.zillow.com',
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "homeharvest"
|
||||
version = "0.2.17"
|
||||
version = "0.2.18"
|
||||
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
|
||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
|
||||
|
|
Loading…
Reference in New Issue