2023-09-19 13:01:39 -07:00
|
|
|
import argparse
|
|
|
|
import datetime
|
|
|
|
from homeharvest import scrape_property
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
|
2023-10-04 08:11:53 -07:00
|
|
|
parser.add_argument(
|
|
|
|
"location", type=str, help="Location to scrape (e.g., San Francisco, CA)"
|
|
|
|
)
|
2023-09-19 13:43:24 -07:00
|
|
|
|
2023-09-19 13:01:39 -07:00
|
|
|
parser.add_argument(
|
2023-09-19 13:43:24 -07:00
|
|
|
"-l",
|
2023-09-19 13:01:39 -07:00
|
|
|
"--listing_type",
|
|
|
|
type=str,
|
|
|
|
default="for_sale",
|
|
|
|
choices=["for_sale", "for_rent", "sold"],
|
|
|
|
help="Listing type to scrape",
|
|
|
|
)
|
2023-09-19 13:43:24 -07:00
|
|
|
|
2023-09-19 13:01:39 -07:00
|
|
|
parser.add_argument(
|
2023-09-19 13:43:24 -07:00
|
|
|
"-o",
|
2023-09-19 13:01:39 -07:00
|
|
|
"--output",
|
|
|
|
type=str,
|
|
|
|
default="excel",
|
|
|
|
choices=["excel", "csv"],
|
|
|
|
help="Output format",
|
|
|
|
)
|
2023-09-19 13:43:24 -07:00
|
|
|
|
2023-09-19 13:01:39 -07:00
|
|
|
parser.add_argument(
|
2023-09-19 13:43:24 -07:00
|
|
|
"-f",
|
2023-09-19 13:01:39 -07:00
|
|
|
"--filename",
|
|
|
|
type=str,
|
|
|
|
default=None,
|
|
|
|
help="Name of the output file (without extension)",
|
|
|
|
)
|
|
|
|
|
2023-10-04 08:11:53 -07:00
|
|
|
parser.add_argument(
|
|
|
|
"-p", "--proxy", type=str, default=None, help="Proxy to use for scraping"
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-d",
|
|
|
|
"--days",
|
|
|
|
type=int,
|
|
|
|
default=None,
|
|
|
|
help="Sold/listed in last _ days filter.",
|
|
|
|
)
|
2023-10-03 22:25:29 -07:00
|
|
|
|
2023-09-20 18:24:18 -07:00
|
|
|
parser.add_argument(
|
2023-10-03 22:25:29 -07:00
|
|
|
"-r",
|
2023-10-04 08:11:53 -07:00
|
|
|
"--radius",
|
2023-10-03 22:25:29 -07:00
|
|
|
type=float,
|
|
|
|
default=None,
|
2023-10-04 08:11:53 -07:00
|
|
|
help="Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-m",
|
|
|
|
"--mls_only",
|
|
|
|
action="store_true",
|
|
|
|
help="If set, fetches only MLS listings.",
|
2023-09-20 18:24:18 -07:00
|
|
|
)
|
|
|
|
|
2023-09-19 13:01:39 -07:00
|
|
|
args = parser.parse_args()
|
2023-09-19 13:43:24 -07:00
|
|
|
|
2023-10-04 08:11:53 -07:00
|
|
|
result = scrape_property(
|
|
|
|
args.location,
|
|
|
|
args.listing_type,
|
|
|
|
radius=args.radius,
|
|
|
|
proxy=args.proxy,
|
|
|
|
mls_only=args.mls_only,
|
2023-10-04 18:06:06 -07:00
|
|
|
property_younger_than=args.days,
|
2023-10-04 08:11:53 -07:00
|
|
|
)
|
2023-09-19 13:01:39 -07:00
|
|
|
|
|
|
|
if not args.filename:
|
|
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
args.filename = f"HomeHarvest_{timestamp}"
|
|
|
|
|
|
|
|
if args.output == "excel":
|
|
|
|
output_filename = f"{args.filename}.xlsx"
|
|
|
|
result.to_excel(output_filename, index=False)
|
|
|
|
print(f"Excel file saved as {output_filename}")
|
|
|
|
elif args.output == "csv":
|
|
|
|
output_filename = f"{args.filename}.csv"
|
|
|
|
result.to_csv(output_filename, index=False)
|
|
|
|
print(f"CSV file saved as {output_filename}")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|