diff --git a/README.md b/README.md index f167bd1..2e6e2b4 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,24 @@ pip install --force-reinstall homeharvest ``` _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_ - + ## Usage + +### CLI + +```bash +homeharvest "San Francisco, CA" --site_name zillow realtor.com redfin --listing_type for_rent --output excel --filename HomeHarvest +``` + +This will scrape properties from the specified sites for the given location and listing type, and save the results to an Excel file named `HomeHarvest.xlsx`. + +By default: +- If `--site_name` is not provided, it will scrape from all available sites. +- If `--listing_type` is left blank, the default is `for_sale`, other options are `for_rent` or `sold`. +- The `--output` default format is `excel`, options are `csv` or `excel`. +- If `--filename` is left blank, the default is `HomeHarvest_` + +### Python ```py from homeharvest import scrape_property import pandas as pd @@ -105,7 +121,14 @@ Property │ └── bldg_min_area (int) └── Apartment Details (for property type: apartment): - └── apt_min_price (int) + ├── apt_min_beds: int + ├── apt_max_beds: int + ├── apt_min_baths: float + ├── apt_max_baths: float + ├── apt_min_price: int + ├── apt_max_price: int + ├── apt_min_sqft: int + ├── apt_max_sqft: int ``` ## Supported Countries for Property Scraping diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 5d21217..05cbea3 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -68,10 +68,10 @@ def get_ordered_properties(result: Property) -> list[str]: "year_built", "agent_name", "mls_id", - "description", "img_src", "latitude", "longitude", + "description", ] diff --git a/homeharvest/cli.py b/homeharvest/cli.py new file mode 100644 index 0000000..a056dd3 --- /dev/null +++ b/homeharvest/cli.py @@ -0,0 +1,57 @@ +import argparse +import datetime +from homeharvest import scrape_property + + +def main(): + parser = argparse.ArgumentParser(description="Home Harvest Property Scraper") + parser.add_argument( + "location", type=str, help="Location to scrape (e.g., San Francisco, CA)" + ) + parser.add_argument( + "--site_name", + type=str, + nargs="*", + default=None, + help="Site name(s) to scrape from (e.g., realtor.com zillow)", + ) + parser.add_argument( + "--listing_type", + type=str, + default="for_sale", + choices=["for_sale", "for_rent", "sold"], + help="Listing type to scrape", + ) + parser.add_argument( + "--output", + type=str, + default="excel", + choices=["excel", "csv"], + help="Output format", + ) + parser.add_argument( + "--filename", + type=str, + default=None, + help="Name of the output file (without extension)", + ) + + args = parser.parse_args() + result = scrape_property(args.location, args.site_name, args.listing_type) + + if not args.filename: + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + args.filename = f"HomeHarvest_{timestamp}" + + if args.output == "excel": + output_filename = f"{args.filename}.xlsx" + result.to_excel(output_filename, index=False) + print(f"Excel file saved as {output_filename}") + elif args.output == "csv": + output_filename = f"{args.filename}.csv" + result.to_csv(output_filename, index=False) + print(f"CSV file saved as {output_filename}") + + +if __name__ == "__main__": + main() diff --git a/homeharvest/core/scrapers/zillow/__init__.py b/homeharvest/core/scrapers/zillow/__init__.py index 3eb108c..43571f6 100644 --- a/homeharvest/core/scrapers/zillow/__init__.py +++ b/homeharvest/core/scrapers/zillow/__init__.py @@ -188,7 +188,9 @@ class ZillowScraper(Scraper): else None, "img_src": result.get("imgSrc"), "price_per_sqft": int(home_info["price"] // home_info["livingArea"]) - if "livingArea" in home_info and "price" in home_info + if "livingArea" in home_info + and home_info["livingArea"] != 0 + and "price" in home_info else None, } property_obj = Property(**property_data) diff --git a/pyproject.toml b/pyproject.toml index 23ea555..b3bc618 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,14 @@ [tool.poetry] name = "homeharvest" -version = "0.2.2" +version = "0.2.3" description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin." authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest" readme = "README.md" +[tool.poetry.scripts] +homeharvest = "homeharvest.cli:main" + [tool.poetry.dependencies] python = "^3.10" requests = "^2.31.0" @@ -18,4 +21,4 @@ pytest = "^7.4.2" [build-system] requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +build-backend = "poetry.core.masonry.api" \ No newline at end of file