#!/usr/bin/env python3
"""CLI for testing Qdrant hybrid search."""

import argparse
import json
import sys
from pathlib import Path

from dotenv import load_dotenv

from .config import SearchConfig, IndexConfig
from .search import QdrantHybridSearch
from .indexer import QdrantIndexer


def _print_debug_info(search: QdrantHybridSearch, query: str) -> None:
    """Print debug information about query enrichment and sparse vectors."""
    from .synonyms import SynonymExpander

    expander = SynonymExpander()
    enriched = expander.enrich_text(query)

    print(f"\n[DEBUG] Original query: {query}")
    print(f"[DEBUG] Enriched query (used for ALL searches): {enriched}")

    sparse = search._text_to_sparse(query)
    print(f"[DEBUG] Sparse vector indices count: {len(sparse.indices)}")
    print(f"[DEBUG] Sparse indices: {sparse.indices}")
    print(f"[DEBUG] Sparse values: {sparse.values}")

    # Show words used for sparse vector with weights
    from .weights import get_word_weight, STOPWORDS
    words = enriched.lower().split()
    word_weights = []
    for w in words:
        clean = "".join(c for c in w if c.isalnum())
        if len(clean) > 1 and clean not in STOPWORDS:
            weight = get_word_weight(clean)
            if weight > 0 and clean not in [ww[0] for ww in word_weights]:
                word_weights.append((clean, weight))
    print(f"[DEBUG] Words with weights: {word_weights}")

    # Test sparse search directly
    print(f"\n[DEBUG] Testing sparse search directly...")
    try:
        sparse_results = search.client.query_points(
            collection_name=search.config.collection_name,
            query=sparse,
            using="sparse",
            limit=5,
            with_payload=True,
        ).points
        print(f"[DEBUG] Sparse search results: {len(sparse_results)}")
        for i, r in enumerate(sparse_results[:3]):
            summary = r.payload.get("summary", r.payload.get("path", "N/A"))
            print(f"[DEBUG]   {i+1}. [{r.score:.3f}] {summary}")
    except Exception as e:
        print(f"[DEBUG] Sparse search error: {e}")

    # Test dense search directly (with enriched query, same as main search)
    print(f"\n[DEBUG] Testing dense search directly...")
    try:
        query_embedding = search.embedding_client.get_embedding(enriched)
        dense_results = search.client.query_points(
            collection_name=search.config.collection_name,
            query=query_embedding,
            using="dense",
            limit=5,
            with_payload=True,
        ).points
        print(f"[DEBUG] Dense search results: {len(dense_results)}")
        for i, r in enumerate(dense_results[:3]):
            summary = r.payload.get("summary", r.payload.get("path", "N/A"))
            print(f"[DEBUG]   {i+1}. [{r.score:.3f}] {summary}")
    except Exception as e:
        print(f"[DEBUG] Dense search error: {e}")

    # Test matryoshka search
    print(f"\n[DEBUG] Testing matryoshka search...")
    try:
        for dim in sorted(search.config.matryoshka_dims):
            mat_results = search.client.query_points(
                collection_name=search.config.collection_name,
                query=query_embedding[:dim],
                using=f"matryoshka_{dim}",
                limit=5,
                with_payload=True,
            ).points
            print(f"[DEBUG] Matryoshka {dim}d results: {len(mat_results)}")
            for i, r in enumerate(mat_results[:3]):
                summary = r.payload.get("summary", r.payload.get("path", "N/A"))
                print(f"[DEBUG]   {i+1}. [{r.score:.3f}] {summary}")
    except Exception as e:
        print(f"[DEBUG] Matryoshka search error: {e}")

    print()


def cmd_search(args: argparse.Namespace, config: SearchConfig) -> None:
    """Execute search command."""
    search = QdrantHybridSearch(config)

    filters = {}
    if args.filter:
        for f in args.filter:
            key, value = f.split("=", 1)
            filters[key] = value

    query = args.query
    if not query:
        print("Interactive search mode. Type 'q' to quit.\n")
        if filters:
            print(f"Active filters: {filters}\n")

        while True:
            try:
                query = input("Search> ").strip()
                if query.lower() in ("q", "quit", "exit"):
                    break
                if not query:
                    continue

                if args.debug:
                    _print_debug_info(search, query)

                results = search.search(
                    query,
                    top_k=args.top_k,
                    filters=filters if filters else None,
                    use_reranking=not args.no_rerank,
                )

                print(f"\n--- Results ({len(results)} found) ---\n")
                for i, r in enumerate(results, 1):
                    print(f"{i:2}. [{r.score:.3f}] {r.id}")
                    for key in ["summary", "title", "name", "path"]:
                        if r.payload.get(key):
                            print(f"    {r.payload[key]}")
                            break
                    print()
            except KeyboardInterrupt:
                break
        print("\nBye!")
    else:
        if args.debug:
            _print_debug_info(search, query)

        results = search.search(
            query,
            top_k=args.top_k,
            filters=filters if filters else None,
            use_reranking=not args.no_rerank,
        )

        if args.json:
            print(json.dumps([r.to_dict() for r in results], indent=2))
        else:
            print(f"\n--- Results for '{query}' ({len(results)} found) ---\n")
            for i, r in enumerate(results, 1):
                print(f"{i:2}. [{r.score:.3f}] {r.id}")
                print(f"    {json.dumps(r.payload, indent=4, ensure_ascii=False)}")
                print()


def cmd_index(args: argparse.Namespace, config: SearchConfig) -> None:
    """Execute index command."""
    # Parse text fields
    text_fields = ["content"]
    if args.text_fields:
        text_fields = [f.strip() for f in args.text_fields.split(",")]

    # Parse payload indexes
    payload_indexes = {}
    if args.payload_indexes:
        for item in args.payload_indexes.split(","):
            field, ftype = item.strip().split(":")
            payload_indexes[field] = ftype

    index_config = IndexConfig(
        id_field=args.id_field,
        text_fields=text_fields,
        payload_indexes=payload_indexes,
        batch_size=args.batch_size,
    )

    indexer = QdrantIndexer(config, index_config)

    if args.create:
        indexer.create_collection(reset=args.reset)
        if payload_indexes:
            indexer.create_payload_indexes()

    if args.file:
        path = Path(args.file)
        if not path.exists():
            print(f"Error: File not found: {path}")
            sys.exit(1)

        if path.suffix == ".jsonl":
            count = indexer.index_from_jsonl(path)
        elif path.suffix == ".json":
            count = indexer.index_from_json(path, items_key=args.items_key)
        else:
            print(f"Error: Unsupported file format: {path.suffix}")
            sys.exit(1)

        print(f"\nIndexed {count} documents")

    info = indexer.get_collection_info()
    print(f"\nCollection info: {json.dumps(info, indent=2)}")


def cmd_info(args: argparse.Namespace, config: SearchConfig) -> None:
    """Show collection info."""
    indexer = QdrantIndexer(config)
    try:
        info = indexer.get_collection_info()
        print(json.dumps(info, indent=2))
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)


def main() -> None:
    """Main CLI entry point."""
    parser = argparse.ArgumentParser(
        description="Qdrant Hybrid Search CLI",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Create collection and index
  %(prog)s index --collection api_ops --create --reset \\
      --file data.jsonl \\
      --id-field operation_id \\
      --text-fields summary,path,description,tags \\
      --payload-indexes resource_type:keyword,effect_type:keyword

  # Interactive search
  %(prog)s search --collection api_ops

  # Search with filter
  %(prog)s search --collection api_ops -q "get documents" \\
      --filter resource_type=document --filter effect_type=read
        """,
    )

    parser.add_argument(
        "--env-file",
        type=str,
        default=".env",
        help="Path to .env file",
    )
    parser.add_argument(
        "--qdrant-url",
        type=str,
        default="http://localhost:6333",
        help="Qdrant server URL",
    )
    subparsers = parser.add_subparsers(dest="command", required=True)

    # Common arguments for all subcommands
    collection_arg = {
        "type": str,
        "required": True,
        "help": "Collection name",
    }

    # Search command
    search_parser = subparsers.add_parser("search", help="Search the index")
    search_parser.add_argument("--collection", "-c", **collection_arg)
    search_parser.add_argument("query", nargs="?", help="Search query")
    search_parser.add_argument("-q", "--query", dest="query_opt", help="Search query (alternative)")
    search_parser.add_argument("--top-k", "-k", type=int, default=10, help="Number of results")
    search_parser.add_argument("--filter", "-f", action="append", help="Filter (field=value)")
    search_parser.add_argument("--no-rerank", action="store_true", help="Disable reranking")
    search_parser.add_argument("--json", action="store_true", help="Output as JSON")
    search_parser.add_argument("--debug", action="store_true", help="Show debug info (query expansion, sparse vectors)")

    # Index command
    index_parser = subparsers.add_parser("index", help="Index documents")
    index_parser.add_argument("--collection", "-c", **collection_arg)
    index_parser.add_argument("--file", type=str, help="JSONL or JSON file to index")
    index_parser.add_argument("--create", action="store_true", help="Create collection")
    index_parser.add_argument("--reset", action="store_true", help="Reset collection (delete existing)")
    index_parser.add_argument(
        "--id-field",
        type=str,
        default="id",
        help="Field name for document ID",
    )
    index_parser.add_argument(
        "--text-fields",
        type=str,
        help="Comma-separated fields for text search (BM25). Example: summary,path,tags",
    )
    index_parser.add_argument(
        "--payload-indexes",
        type=str,
        help="Comma-separated field:type pairs for filtering. Example: resource_type:keyword,status:keyword",
    )
    index_parser.add_argument("--items-key", type=str, help="Key for items array in JSON file")
    index_parser.add_argument("--batch-size", type=int, default=50, help="Batch size")

    # Info command
    info_parser = subparsers.add_parser("info", help="Show collection info")
    info_parser.add_argument("--collection", "-c", **collection_arg)

    args = parser.parse_args()

    # Handle query from either positional or optional argument
    if args.command == "search" and args.query_opt:
        args.query = args.query_opt

    # Load env file
    env_path = Path(args.env_file)
    if env_path.exists():
        load_dotenv(env_path)

    # Build config
    config = SearchConfig(
        qdrant_url=args.qdrant_url,
        collection_name=args.collection,
    )

    # Execute command
    if args.command == "search":
        cmd_search(args, config)
    elif args.command == "index":
        cmd_index(args, config)
    elif args.command == "info":
        cmd_info(args, config)


if __name__ == "__main__":
    main()
