2023-09-15 15:17:37 -07:00
|
|
|
from dataclasses import dataclass
|
|
|
|
import requests
|
2023-09-28 18:40:16 -07:00
|
|
|
import tls_client
|
2023-09-17 16:30:37 -07:00
|
|
|
from .models import Property, ListingType, SiteName
|
2023-09-15 15:17:37 -07:00
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class ScraperInput:
|
|
|
|
location: str
|
|
|
|
listing_type: ListingType
|
2023-09-17 16:30:37 -07:00
|
|
|
site_name: SiteName
|
2023-09-19 13:43:24 -07:00
|
|
|
proxy: str | None = None
|
2023-09-15 15:17:37 -07:00
|
|
|
|
|
|
|
|
|
|
|
class Scraper:
|
2023-09-28 18:40:16 -07:00
|
|
|
def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None):
|
2023-09-15 15:17:37 -07:00
|
|
|
self.location = scraper_input.location
|
2023-09-18 08:16:59 -07:00
|
|
|
self.listing_type = scraper_input.listing_type
|
|
|
|
|
2023-09-28 18:40:16 -07:00
|
|
|
if not session:
|
|
|
|
self.session = requests.Session()
|
|
|
|
else:
|
|
|
|
self.session = session
|
|
|
|
|
2023-09-19 14:05:14 -07:00
|
|
|
if scraper_input.proxy:
|
|
|
|
proxy_url = scraper_input.proxy
|
2023-09-19 19:13:20 -07:00
|
|
|
proxies = {"http": proxy_url, "https": proxy_url}
|
2023-09-19 14:05:14 -07:00
|
|
|
self.session.proxies.update(proxies)
|
2023-09-28 18:40:16 -07:00
|
|
|
|
2023-09-17 16:30:37 -07:00
|
|
|
self.listing_type = scraper_input.listing_type
|
|
|
|
self.site_name = scraper_input.site_name
|
2023-09-15 15:17:37 -07:00
|
|
|
|
2023-09-17 13:06:31 -07:00
|
|
|
def search(self) -> list[Property]:
|
|
|
|
...
|
2023-09-15 15:42:47 -07:00
|
|
|
|
|
|
|
@staticmethod
|
2023-09-17 13:06:31 -07:00
|
|
|
def _parse_home(home) -> Property:
|
|
|
|
...
|
2023-09-15 20:58:54 -07:00
|
|
|
|
2023-09-17 13:06:31 -07:00
|
|
|
def handle_location(self):
|
|
|
|
...
|