diff --git a/README.md b/README.md index 0b4597c..565228c 100644 --- a/README.md +++ b/README.md @@ -90,9 +90,11 @@ Optional │ ├── foreclosure (True/False): If set, fetches only foreclosures │ -└── proxy (string): In format 'http://user:pass@host:port' +├── proxy (string): In format 'http://user:pass@host:port' │ -└── extra_property_data (bool): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.) +├── extra_property_data (True/False): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.) +│ +└── exclude_pending (True/False): If set, excludes pending properties from the results unless listing_type is 'pending' ``` ### Property Schema @@ -142,6 +144,11 @@ Property │ ├── agent │ ├── agent_email │ └── agent_phone + +├── Broker Info: +│ ├── broker +│ ├── broker_email +│ └── broker_website ``` ### Exceptions diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 2bbacf8..a309591 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -17,11 +17,12 @@ def scrape_property( date_to: str = None, foreclosure: bool = None, extra_property_data: bool = True, + exclude_pending: bool = False, ) -> pd.DataFrame: """ Scrape properties from Realtor.com based on a given location and listing type. :param location: Location to search (e.g. "Dallas, TX", "85281", "2530 Al Lipscomb Way") - :param listing_type: Listing Type (for_sale, for_rent, sold) + :param listing_type: Listing Type (for_sale, for_rent, sold, pending) :param radius: Get properties within _ (e.g. 1.0) miles. Only applicable for individual addresses. :param mls_only: If set, fetches only listings with MLS IDs. :param proxy: Proxy to use for scraping @@ -29,6 +30,7 @@ def scrape_property( :param date_from, date_to: Get properties sold or listed (dependent on your listing_type) between these dates. format: 2021-01-28 :param foreclosure: If set, fetches only foreclosure listings. :param extra_property_data: Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.) + :param exclude_pending: If true, this excludes pending or contingent properties from the results, unless listing type is pending. """ validate_input(listing_type) validate_dates(date_from, date_to) @@ -44,6 +46,7 @@ def scrape_property( date_to=date_to, foreclosure=foreclosure, extra_property_data=extra_property_data, + exclude_pending=exclude_pending, ) site = RealtorScraper(scraper_input) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 8888e78..6f1b9b6 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -21,6 +21,7 @@ class ScraperInput: date_to: str | None = None foreclosure: bool | None = False extra_property_data: bool | None = True + exclude_pending: bool | None = False class Scraper: @@ -62,6 +63,7 @@ class Scraper: self.date_to = scraper_input.date_to self.foreclosure = scraper_input.foreclosure self.extra_property_data = scraper_input.extra_property_data + self.exclude_pending = scraper_input.exclude_pending def search(self) -> list[Property]: ... diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 8fd596f..942722b 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -525,7 +525,7 @@ class RealtorScraper(Scraper): is_pending = result["flags"].get("is_pending") or result["flags"].get("is_contingent") - if is_pending and self.listing_type != ListingType.PENDING: + if is_pending and (self.exclude_pending and self.listing_type != ListingType.PENDING): return property_id = result["property_id"] diff --git a/pyproject.toml b/pyproject.toml index 221c02c..cd5aa16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.28" +version = "0.3.29" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/HomeHarvest" diff --git a/tests/test_realtor.py b/tests/test_realtor.py index c50dae8..e88271b 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -4,7 +4,7 @@ from homeharvest import scrape_property def test_realtor_pending_or_contingent(): pending_or_contingent_result = scrape_property(location="Surprise, AZ", listing_type="pending") - regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale") + regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale", exclude_pending=True) assert all([result is not None for result in [pending_or_contingent_result, regular_result]]) assert len(pending_or_contingent_result) != len(regular_result) @@ -165,3 +165,13 @@ def test_pr_zip_code(): ) assert results is not None and len(results) > 0 + + +def test_exclude_pending(): + results = scrape_property( + location="33567", + listing_type="pending", + exclude_pending=True, + ) + + assert results is not None and len(results) > 0