docs(principles): move research into docs/precedent-corpus-redesign/ (README + research-full) (#153)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-20 11:36:38 +00:00
parent dd8064d94c
commit 8d409edc9d
13 changed files with 2399 additions and 2 deletions
--- a/scripts/compute_principle_gold.py
+++ b/scripts/compute_principle_gold.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""Principle-level gold matching driver (importance layer #153, component 1).
+
+Flags the specific principle a chair relied on (gold_chair / chair_cited) or a
+digest highlights (gold_digest), by embedding-matching match_context /
+headline_holding to the cited precedent's principles. EMBEDDING-ONLY (no LLM).
+All logic in services/principle_gold (reused by the periodic refresh job, G2).
+
+  cd ~/legal-ai/mcp-server
+  HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py            # dry-run
+  HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py --apply    # write flags
+  HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py --threshold 0.75
+"""
+from __future__ import annotations
+
+import argparse
+import asyncio
+import os
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "mcp-server", "src"))
+
+from legal_mcp.services import principle_gold  # noqa: E402
+
+
+async def _run(apply: bool, threshold: float | None) -> int:
+    mode = "APPLY" if apply else "DRY-RUN"
+    print(f"[{mode}] principle-level gold matching (embedding-only)\n", flush=True)
+    s = await principle_gold.match_all(threshold=threshold, apply=apply)
+    print(f"threshold: {s['threshold']}")
+    print(f"\nchair citations processed: {s['chair_citations']}")
+    print(f"   → gold_chair (our chair, tier-1): {s['gold_chair']}")
+    print(f"   → chair_cited (other chair, tier-2): {s['chair_cited']}")
+    print(f"   → no match ≥ threshold: {s['chair_no_match']}")
+    print(f"\ndigests processed: {s['digests']}")
+    print(f"   → gold_digest (tier-1): {s['gold_digest']}")
+    print(f"   → no match ≥ threshold: {s['digest_no_match']}")
+    if s.get("samples"):
+        print("\nsample matches (label · sim · halacha):")
+        for m in s["samples"]:
+            print(f"   {m['label']:<18} sim={m['sim']} {m['halacha_id'][:8]}")
+    if s.get("coverage"):
+        c = s["coverage"]
+        print(f"\n── coverage (live original principles: {c['live_original']}) ──")
+        print(f"   protected (gold_chair OR gold_digest): {c['protected']}")
+        print(f"   gold_chair={c['gold_chair']}  gold_digest={c['gold_digest']}  chair_cited={c['chair_cited']}")
+    if not apply:
+        print("\ndry-run — no flags written. Re-run with --apply to commit.")
+    return 0
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description="Principle-level gold matching (#153)")
+    p.add_argument("--apply", action="store_true", help="write gold flags (idempotent)")
+    p.add_argument("--threshold", type=float, default=None, help="override match cosine floor")
+    a = p.parse_args()
+    return asyncio.run(_run(a.apply, a.threshold))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())