#!/usr/bin/env python3
"""
Generic ArchitectOS / dev-project cleanup utility.

Interactive TUI-style menu to reclaim disk space safely during development.

Usage:
    python3 tools/aos_cleanup.py [--root /path/to/project]

Defaults:
    - If --root is not provided:
        * If this script is in ROOT/tools/aos_cleanup.py, ROOT = parent of parent.
        * Otherwise, ROOT = current working directory.
"""

import argparse
import os
import shutil
import subprocess
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Iterable, List, Optional, Tuple

def resolve_root(explicit_root: Optional[str]) -> Path:
    if explicit_root:
        return Path(explicit_root).expanduser().resolve()

    here = Path(__file__).resolve()
    if here.parent.name == "tools":
        return here.parent.parent

    return Path.cwd().resolve()

def confirm(prompt: str) -> bool:
    ans = input(f"{prompt} [y/N]: ").strip().lower()
    return ans in ("y", "yes")

def safe_rmtree(path: Path) -> None:
    if not path.exists():
        print(f"  - {path} (does not exist)")
        return
    try:
        shutil.rmtree(path)
        print(f"  - Removed directory tree: {path}")
    except Exception as e:
        print(f"  ! Failed to remove {path}: {e}")

def safe_unlink(path: Path) -> None:
    if not path.exists():
        print(f"  - {path} (does not exist)")
        return
    try:
        path.unlink()
        print(f"  - Removed file: {path}")
    except Exception as e:
        print(f"  ! Failed to remove {path}: {e}")

def du(path: Path) -> Optional[str]:
    if not path.exists():
        return None
    try:
        out = subprocess.check_output(
            ["du", "-sh", str(path)],
            stderr=subprocess.DEVNULL,
        )
        return out.decode("utf-8", errors="ignore").strip()
    except Exception:
        return None

def list_dir_sizes(root: Path) -> List[str]:
    lines: List[str] = []
    for child in sorted(root.iterdir(), key=lambda p: p.name):
        size = du(child)
        if size:
            lines.append(size)
    return lines

def iter_dirs(root: Path):
    for dirpath, dirnames, _ in os.walk(root):
        yield Path(dirpath)

BUILD_DIR_NAMES = {
    "target",
    "node_modules",
    "dist",
    "build",
    ".next",
    ".tox",
    ".pytest_cache",
    ".mypy_cache",
    ".ruff_cache",
    "__pycache__",
}

VENV_DIR_NAMES = {
    "venv",
    ".venv",
    "env",
    ".env",
}

def discover_build_dirs(root: Path):
    results = []
    for d in iter_dirs(root):
        if d.name in BUILD_DIR_NAMES:
            try:
                d.relative_to(root)
            except ValueError:
                continue
            results.append(d)
    return sorted(set(results))

def is_virtualenv_dir(path: Path) -> bool:
    if not path.is_dir():
        return False
    if path.name not in VENV_DIR_NAMES:
        return False
    if (path / "bin" / "activate").exists():
        return True
    if (path / "Scripts" / "activate").exists():
        return True
    return False

def discover_venvs(root: Path):
    results = []
    for d in iter_dirs(root):
        if is_virtualenv_dir(d):
            try:
                d.relative_to(root)
            except ValueError:
                continue
            results.append(d)
    return sorted(set(results))

def discover_logs_dirs(root: Path):
    results = []
    for d in iter_dirs(root):
        if d.name == "commands" and d.parent.name == "logs":
            try:
                d.relative_to(root)
            except ValueError:
                continue
            results.append(d)
    return sorted(set(results))

def discover_rag_dbs(root: Path):
    results = []
    data_dir = root / "data"
    if not data_dir.exists():
        return results
    for p in data_dir.glob("*.db"):
        results.append(p)
    return sorted(set(results))

def action_show_disk_usage(root: Path) -> None:
    print(f"\n=== Disk usage summary for {root} ===\n")
    lines = list_dir_sizes(root)
    if not lines:
        print("No subdirectories or 'du' not available.")
        return
    for line in lines:
        print(line)
    print("")

def action_clean_build_dirs(root: Path) -> None:
    print("\n=== Build artifact directories ===\n")
    build_dirs = discover_build_dirs(root)
    if not build_dirs:
        print("No build artifact directories found.")
        return

    for d in build_dirs:
        size = du(d)
        print(f"  - {d} ({size or 'size unknown'})")
    print("")

    if not confirm("Remove ALL of these build directories?"):
        print("Aborted.")
        return

    for d in build_dirs:
        safe_rmtree(d)

def action_clean_venvs(root: Path) -> None:
    print("\n=== Python virtualenv directories ===\n")
    venvs = discover_venvs(root)
    if not venvs:
        print("No virtualenv directories found.")
        return

    for v in venvs:
        size = du(v)
        print(f"  - {v} ({size or 'size unknown'})")
    print("")

    if not confirm("Remove ALL of these virtualenvs?"):
        print("Aborted.")
        return

    for v in venvs:
        safe_rmtree(v)

def action_prune_logs(root: Path) -> None:
    logs_dirs = discover_logs_dirs(root)
    if not logs_dirs:
        print("\nNo logs/commands directories found under root.\n")
        return

    print("\n=== Command log directories ===\n")
    for d in logs_dirs:
        size = du(d)
        print(f"  - {d} ({size or 'size unknown'})")
    print("")

    try:
        days_str = input("Delete logs older than how many days? (e.g. 7, or 0 for ALL): ").strip()
        days = int(days_str)
    except ValueError:
        print("Invalid number, aborting.")
        return

    if days < 0:
        print("Days cannot be negative, aborting.")
        return

    if days == 0:
        if not confirm("Delete ALL command logs in these directories?"):
            print("Aborted.")
            return
        cutoff = None
    else:
        cutoff = datetime.now() - timedelta(days=days)
        print(f"Will delete logs older than {days} days (before {cutoff}).")
        if not confirm("Proceed with deletion?"):
            print("Aborted.")
            return

    deleted = 0
    for d in logs_dirs:
        for p in d.glob("*.log"):
            if cutoff is None:
                safe_unlink(p)
                deleted += 1
            else:
                try:
                    mtime = datetime.fromtimestamp(p.stat().st_mtime)
                except OSError:
                    continue
                if mtime < cutoff:
                    safe_unlink(p)
                    deleted += 1

    print(f"\nDeleted {deleted} log files.\n")

def action_delete_rag_dbs(root: Path) -> None:
    rag_dbs = discover_rag_dbs(root)
    print("\n=== RAG database files (data/*.db) ===\n")
    if not rag_dbs:
        print("No RAG DB files found under data/.")
        return

    for db in rag_dbs:
        size = du(db)
        print(f"  - {db} ({size or 'size unknown'})")

    print("")
    if not confirm("Delete ALL of these RAG DB files? (They are rebuildable.)"):
        print("Aborted.")
        return

    for db in rag_dbs:
        safe_unlink(db)

def action_clear_pip_cache() -> None:
    pip_cache = Path.home() / ".cache" / "pip"
    print(f"\n=== Pip cache at {pip_cache} ===\n")
    size = du(pip_cache)
    if not pip_cache.exists():
        print("Pip cache directory does not exist.")
        return
    print(f"Current size: {size or 'size unknown'}\n")
    if not confirm("Remove the entire pip cache? It will be regenerated as needed."):
        print("Aborted.")
        return
    safe_rmtree(pip_cache)

def action_show_biggest_dirs(root: Path) -> None:
    print(f"\n=== Largest directories under {root} (via du -sh) ===\n")
    candidates = []
    for child in root.iterdir():
        if child.is_dir():
            candidates.append(child)
            for sub in child.iterdir():
                if sub.is_dir():
                    candidates.append(sub)

    scored = []
    for c in candidates:
        if not c.exists():
            continue
        try:
            out = subprocess.check_output(
                ["du", "-s", str(c)],
                stderr=subprocess.DEVNULL,
            )
            blocks_str = out.decode("utf-8", errors="ignore").strip().split()[0]
            blocks = int(blocks_str)
            scored.append((blocks, c))
        except Exception:
            continue

    if not scored:
        print("No size data available.")
        return

    scored.sort(reverse=True)
    top_n = 15
    for blocks, path in scored[:top_n]:
        size_mb = blocks / 1024
        if size_mb < 1024:
            size_str = f"{size_mb:.1f} MB"
        else:
            size_str = f"{size_mb/1024:.2f} GB"
        print(f"{size_str:>10}  {path}")
    print("")

def action_all_safe(root: Path) -> None:
    print("\n=== ALL SAFE CLEANUPS ===")
    print("This will:")
    print("  - Remove build artifact directories (target, node_modules, dist, build, ...)")
    print("  - Remove virtualenv directories under the project (venv, .venv, ...)")
    print("  - Prune old command logs (older than N days you choose)")
    print("  - Delete RAG DB files under data/")
    print("It WILL NOT touch your source files or the main project venv unless it matches the heuristics.\n")

    if not confirm("Proceed with ALL safe cleanups?"):
        print("Aborted.")
        return

    action_clean_build_dirs(root)
    action_clean_venvs(root)
    action_prune_logs(root)
    action_delete_rag_dbs(root)

def print_menu(root: Path) -> None:
    print(f"""\n====================================================
 ArchitectOS / Dev Project Cleanup
 Root: {root}
====================================================
  1) Show disk usage summary for root
  2) Clean build artifact directories
  3) Clean Python virtualenv directories
  4) Prune command logs (logs/commands)
  5) Delete RAG DB files (data/*.db)
  6) Clear pip cache (~/.cache/pip)
  7) Show largest directories under root
  8) Run ALL safe cleanups
  9) Quit
""")

def main() -> None:
    parser = argparse.ArgumentParser(description="Generic dev cleanup utility")
    parser.add_argument(
        "--root",
        help="Project root directory (default: inferred from script location or cwd)",
    )
    args = parser.parse_args()

    root = resolve_root(args.root)

    if not root.exists():
        print(f"ERROR: root path does not exist: {root}", file=sys.stderr)
        sys.exit(1)

    while True:
        print_menu(root)
        choice = input("Select an option (1-9): ").strip()

        if choice == "1":
            action_show_disk_usage(root)
        elif choice == "2":
            action_clean_build_dirs(root)
        elif choice == "3":
            action_clean_venvs(root)
        elif choice == "4":
            action_prune_logs(root)
        elif choice == "5":
            action_delete_rag_dbs(root)
        elif choice == "6":
            action_clear_pip_cache()
        elif choice == "7":
            action_show_biggest_dirs(root)
        elif choice == "8":
            action_all_safe(root)
        elif choice == "9":
            print("Goodbye.")
            break
        else:
            print("Invalid choice. Please enter a number 1-9.")

if __name__ == "__main__":
    main()
