fix: redfin

redfin
Cullen 2024-04-04 17:05:00 -05:00
parent 01c53f9399
commit d05bc5d79f
3 changed files with 13 additions and 1 deletions

11
example.py Normal file
View File

@ -0,0 +1,11 @@
from homeharvest import scrape_property
import pandas as pd
properties: pd.DataFrame = scrape_property(
site_name=["redfin"],
location="85281",
listing_type="for_rent" # for_sale / sold
)
print(properties)
properties.to_csv('properties.csv', index=False)

View File

@ -17,6 +17,7 @@ class Scraper:
self.listing_type = scraper_input.listing_type self.listing_type = scraper_input.listing_type
self.session = requests.Session() self.session = requests.Session()
self.session.headers.update({"user-agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'})
if scraper_input.proxy: if scraper_input.proxy:
proxy_url = scraper_input.proxy proxy_url = scraper_input.proxy
proxies = {"http": proxy_url, "https": proxy_url} proxies = {"http": proxy_url, "https": proxy_url}

View File

@ -21,7 +21,7 @@ class RedfinScraper(Scraper):
def _handle_location(self): def _handle_location(self):
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(self.location) url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(self.location)
response = self.session.get(url) response = self.session.get(url, headers={"user-agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'})
response_json = json.loads(response.text.replace("{}&&", "")) response_json = json.loads(response.text.replace("{}&&", ""))
def get_region_type(match_type: str): def get_region_type(match_type: str):