fix: redfin
parent
01c53f9399
commit
d05bc5d79f
|
@ -0,0 +1,11 @@
|
||||||
|
from homeharvest import scrape_property
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
properties: pd.DataFrame = scrape_property(
|
||||||
|
site_name=["redfin"],
|
||||||
|
location="85281",
|
||||||
|
listing_type="for_rent" # for_sale / sold
|
||||||
|
)
|
||||||
|
|
||||||
|
print(properties)
|
||||||
|
properties.to_csv('properties.csv', index=False)
|
|
@ -17,6 +17,7 @@ class Scraper:
|
||||||
self.listing_type = scraper_input.listing_type
|
self.listing_type = scraper_input.listing_type
|
||||||
|
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
|
self.session.headers.update({"user-agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'})
|
||||||
if scraper_input.proxy:
|
if scraper_input.proxy:
|
||||||
proxy_url = scraper_input.proxy
|
proxy_url = scraper_input.proxy
|
||||||
proxies = {"http": proxy_url, "https": proxy_url}
|
proxies = {"http": proxy_url, "https": proxy_url}
|
||||||
|
|
|
@ -21,7 +21,7 @@ class RedfinScraper(Scraper):
|
||||||
def _handle_location(self):
|
def _handle_location(self):
|
||||||
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(self.location)
|
url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format(self.location)
|
||||||
|
|
||||||
response = self.session.get(url)
|
response = self.session.get(url, headers={"user-agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'})
|
||||||
response_json = json.loads(response.text.replace("{}&&", ""))
|
response_json = json.loads(response.text.replace("{}&&", ""))
|
||||||
|
|
||||||
def get_region_type(match_type: str):
|
def get_region_type(match_type: str):
|
||||||
|
|
Loading…
Reference in New Issue