"""Derive missing_precedents 'open' gaps from cited_only stubs (#143, G2).

Two parallel systems described the same concept — "a cited precedent whose text
isn't in the corpus": the ``missing_precedents`` queue (the chair's acquisition
list) and ``case_law`` rows with ``source_kind='cited_only'`` (citation-only
stubs seeded by the X11 / corpus-graph). Overlap was ~0, so the 31 cited_only
stubs never surfaced on /missing-precedents.

This makes ``missing_precedents`` the single source-of-truth FOR THE QUEUE and
``cited_only`` a DERIVED discovery source (like digests feed the radar):
  1. Backfill ``citation_norm`` (designator-aware dedup key) for every existing
     missing_precedent — required before the dedup below can match.
  2. For each cited_only stub, derive an 'open' missing_precedent (deduped on
     citation_norm), with ``discovery_source='cited_only'``,
     ``linked_case_law_id`` = the stub (its canonical identity is known; status
     stays 'open' until the text is uploaded → promote-in-place), and notes
     listing the precedents that cite it.

Idempotent / re-runnable. Dry-run by default; ``--apply`` to write.
Host-only. Run:
    HOME=/home/chaim mcp-server/.venv/bin/python scripts/derive_missing_from_cited_only.py [--apply]
"""

from __future__ import annotations

import asyncio
import os
import sys

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "mcp-server", "src"))

from legal_mcp.services import court_citation, db


async def _backfill_citation_norm(pool, apply: bool) -> int:
    rows = await pool.fetch(
        "SELECT id, citation FROM missing_precedents "
        "WHERE COALESCE(citation_norm, '') = ''"
    )
    n = 0
    for r in rows:
        norm = court_citation.citation_dedup_key(r["citation"] or "")
        if not norm:
            continue
        if apply:
            await pool.execute(
                "UPDATE missing_precedents SET citation_norm = $2 WHERE id = $1",
                r["id"], norm,
            )
        n += 1
    return n


async def _citing_precedents_note(pool, stub_id) -> str:
    rows = await pool.fetch(
        """SELECT DISTINCT cl.case_number
           FROM precedent_internal_citations p
           JOIN case_law cl ON cl.id = p.source_case_law_id
           WHERE p.cited_case_law_id = $1 AND COALESCE(cl.case_number,'') <> ''
           ORDER BY cl.case_number LIMIT 8""",
        stub_id,
    )
    citers = [r["case_number"] for r in rows]
    base = "נגזר מ-cited_only (גרף-הציטוטים)"
    if citers:
        return f"{base}; מצוטט ע\"י: {', '.join(citers)}"
    return base


async def main(apply: bool) -> int:
    pool = await db.get_pool()

    backfilled = await _backfill_citation_norm(pool, apply)
    print(f"citation_norm backfill (existing rows){'' if apply else ' [dry]'}: {backfilled}")

    stubs = await pool.fetch(
        "SELECT id, case_number, case_name FROM case_law "
        "WHERE source_kind = 'cited_only' ORDER BY case_number"
    )
    print(f"cited_only stubs: {len(stubs)}")

    created = 0
    skipped = 0
    for s in stubs:
        citation = (s["case_number"] or "").strip()
        if not citation:
            print(f"  SKIP (no case_number) id={s['id']}")
            continue
        existing = await db.find_missing_precedent_by_citation(citation)
        if existing:
            skipped += 1
            continue
        norm = court_citation.citation_dedup_key(citation)
        print(f"  + {citation:<22} norm={norm!r} name={(s['case_name'] or '')[:24]!r}")
        if apply:
            note = await _citing_precedents_note(pool, s["id"])
            await db.create_missing_precedent(
                citation=citation,
                case_name=s["case_name"] or None,
                discovery_source="cited_only",
                linked_case_law_id=s["id"],
                notes=note,
            )
        created += 1

    print(f"\n{'created' if apply else 'would create'}: {created}   already-present (deduped): {skipped}")
    if not apply:
        print("(dry-run — pass --apply to write)")
    return 0


if __name__ == "__main__":
    sys.exit(asyncio.run(main("--apply" in sys.argv)))