HomeHarvest/homeharvest/core/scrapers/__init__.py

46 lines
1.2 KiB
Python
Raw Normal View History

2023-09-15 15:17:37 -07:00
from dataclasses import dataclass
import requests
2023-09-28 18:40:16 -07:00
import tls_client
2023-09-17 16:30:37 -07:00
from .models import Property, ListingType, SiteName
2023-09-15 15:17:37 -07:00
@dataclass
class ScraperInput:
location: str
listing_type: ListingType
2023-09-17 16:30:37 -07:00
site_name: SiteName
2023-10-02 13:58:47 -07:00
radius: float | None = None
2023-09-19 13:43:24 -07:00
proxy: str | None = None
2023-10-03 15:05:17 -07:00
sold_last_x_days: int | None = None
2023-09-15 15:17:37 -07:00
class Scraper:
2023-09-28 18:40:16 -07:00
def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None):
2023-09-15 15:17:37 -07:00
self.location = scraper_input.location
2023-09-18 08:16:59 -07:00
self.listing_type = scraper_input.listing_type
2023-09-28 18:40:16 -07:00
if not session:
self.session = requests.Session()
else:
self.session = session
2023-09-19 14:05:14 -07:00
if scraper_input.proxy:
proxy_url = scraper_input.proxy
2023-09-19 19:13:20 -07:00
proxies = {"http": proxy_url, "https": proxy_url}
2023-09-19 14:05:14 -07:00
self.session.proxies.update(proxies)
2023-09-28 18:40:16 -07:00
2023-09-17 16:30:37 -07:00
self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name
2023-10-02 13:58:47 -07:00
self.radius = scraper_input.radius
2023-10-03 15:05:17 -07:00
self.sold_last_x_days = scraper_input.sold_last_x_days
2023-09-15 15:17:37 -07:00
2023-09-17 13:06:31 -07:00
def search(self) -> list[Property]:
...
2023-09-15 15:42:47 -07:00
@staticmethod
2023-09-17 13:06:31 -07:00
def _parse_home(home) -> Property:
...
2023-09-15 20:58:54 -07:00
2023-09-17 13:06:31 -07:00
def handle_location(self):
...