docs(principles): move research into docs/precedent-corpus-redesign/ (README + research-full) (#153)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-20 11:36:38 +00:00
parent dd8064d94c
commit 8d409edc9d
13 changed files with 2399 additions and 2 deletions
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -1655,6 +1655,20 @@ ALTER TABLE halachot ALTER COLUMN embedding DROP NOT NULL;
 CREATE INDEX IF NOT EXISTS idx_halachot_canonical ON halachot(canonical_id);
 CREATE INDEX IF NOT EXISTS idx_halachot_instance_type ON halachot(instance_type);

+-- Importance layer (#153, component 1): principle-level gold/chair-cited flags.
+-- gold_chair  = our chair (דפנה) cited THIS specific principle (tier-1, protective).
+-- gold_digest = a digest's headline_holding matches this principle (tier-1).
+-- chair_cited = another committee chair cited it (tier-2, weight not protection).
+-- gold_match_score = best cosine of the matched signal (audit/tuning, G9).
+-- All set by scripts/compute_principle_gold.py (embedding match, no LLM).
+ALTER TABLE halachot
+    ADD COLUMN IF NOT EXISTS gold_chair BOOLEAN NOT NULL DEFAULT false,
+    ADD COLUMN IF NOT EXISTS gold_digest BOOLEAN NOT NULL DEFAULT false,
+    ADD COLUMN IF NOT EXISTS chair_cited BOOLEAN NOT NULL DEFAULT false,
+    ADD COLUMN IF NOT EXISTS gold_match_score REAL;
+CREATE INDEX IF NOT EXISTS idx_halachot_gold ON halachot(gold_chair, gold_digest)
+    WHERE gold_chair OR gold_digest;
+
 -- halacha_citation_corroboration (X11) gains canonical_id so the signal
 -- aggregates at the principle level rather than the per-instance level.
 -- Backfill: UPDATE halacha_citation_corroboration SET canonical_id =
@@ -6300,6 +6314,109 @@ async def apply_canonical_synthesis(
    return result.split()[-1] != "0"


+# ── Importance layer (#153) — principle-level gold matching ──────────────────
+
+async def gold_chair_citations() -> list[dict]:
+    """Committee-sourced citations for gold matching (#153).
+
+    Each row: the cited precedent, the citing chair's name, and the match_context
+    (text around the citation — the holding the chair invoked). `is_our_chair`
+    distinguishes tier-1 (OUR_CHAIR → gold_chair) from tier-2 (other chair →
+    chair_cited).
+    """
+    pool = await get_pool()
+    rows = await pool.fetch(
+        "SELECT pic.cited_case_law_id, pic.match_context, "
+        "       COALESCE(src.chair_name,'') AS citing_chair, "
+        "       (src.chair_name = $1) AS is_our_chair "
+        "FROM precedent_internal_citations pic "
+        "JOIN case_law src ON src.id = pic.source_case_law_id "
+        "WHERE pic.cited_case_law_id IS NOT NULL "
+        "  AND length(COALESCE(pic.match_context,'')) > 20",
+        config.OUR_CHAIR_NAME,
+    )
+    return [dict(r) for r in rows]
+
+
+async def gold_digest_holdings() -> list[dict]:
+    """Digest→precedent links with the headline holding the digest highlights (#153)."""
+    pool = await get_pool()
+    rows = await pool.fetch(
+        "SELECT linked_case_law_id, "
+        "       COALESCE(NULLIF(headline_holding,''), summary, '') AS holding_text "
+        "FROM digests "
+        "WHERE linked_case_law_id IS NOT NULL "
+        "  AND length(COALESCE(NULLIF(headline_holding,''), summary, '')) > 20",
+    )
+    return [dict(r) for r in rows]
+
+
+async def nearest_original_principle(
+    case_law_id: "UUID", vec: list[float],
+) -> "tuple[str, float] | None":
+    """Nearest live 'original' principle of a precedent to `vec`, with cosine sim.
+
+    Scoped to one precedent (the cited/linked source) so a chair's citation is
+    matched only against principles actually extracted from THAT decision. Skips
+    rejected instances. Returns (halacha_id, sim) or None if the precedent has no
+    embedded live principle.
+    """
+    pool = await get_pool()
+    row = await pool.fetchrow(
+        "SELECT id::text AS id, 1 - (embedding <=> $2) AS sim "
+        "FROM halachot "
+        "WHERE case_law_id = $1 AND instance_type = 'original' "
+        "  AND embedding IS NOT NULL AND review_status <> 'rejected' "
+        "ORDER BY embedding <=> $2 LIMIT 1",
+        case_law_id, vec,
+    )
+    return (row["id"], float(row["sim"])) if row else None
+
+
+async def set_principle_gold(
+    halacha_id: "UUID", *, gold_chair: bool = False, gold_digest: bool = False,
+    chair_cited: bool = False, score: float | None = None,
+) -> None:
+    """OR-merge gold/chair-cited flags onto a principle (a principle may be both
+    chair-cited AND in a digest). Keeps the MAX match score seen (#153)."""
+    pool = await get_pool()
+    await pool.execute(
+        "UPDATE halachot SET "
+        "  gold_chair = gold_chair OR $2, "
+        "  gold_digest = gold_digest OR $3, "
+        "  chair_cited = chair_cited OR $4, "
+        "  gold_match_score = GREATEST(COALESCE(gold_match_score, 0), COALESCE($5, 0)), "
+        "  updated_at = now() "
+        "WHERE id = $1",
+        halacha_id, gold_chair, gold_digest, chair_cited, score,
+    )
+
+
+async def reset_principle_gold() -> int:
+    """Clear all gold/chair-cited flags (idempotent re-run of the matcher). #153."""
+    pool = await get_pool()
+    res = await pool.execute(
+        "UPDATE halachot SET gold_chair=false, gold_digest=false, chair_cited=false, "
+        "gold_match_score=NULL WHERE gold_chair OR gold_digest OR chair_cited",
+    )
+    return int(res.split()[-1]) if res.split()[-1].isdigit() else 0
+
+
+async def gold_coverage_stats() -> dict:
+    """Counts for the gold-matching coverage report (#153)."""
+    pool = await get_pool()
+    row = await pool.fetchrow(
+        "SELECT "
+        " count(*) FILTER (WHERE instance_type='original' AND review_status<>'rejected') AS live_original, "
+        " count(*) FILTER (WHERE gold_chair) AS gold_chair, "
+        " count(*) FILTER (WHERE gold_digest) AS gold_digest, "
+        " count(*) FILTER (WHERE chair_cited) AS chair_cited, "
+        " count(*) FILTER (WHERE gold_chair OR gold_digest) AS protected "
+        "FROM halachot",
+    )
+    return dict(row)
+
+
 async def list_canonical_instances(canonical_id: "UUID") -> list[dict]:
    """List all halachot (instances) sharing a canonical_id — used by the UI accordion."""
    pool = await get_pool()