docs(principles): move research into docs/precedent-corpus-redesign/ (README + research-full) (#153)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-20 11:36:38 +00:00
parent dd8064d94c
commit 8d409edc9d
13 changed files with 2399 additions and 2 deletions

View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""Principle-level gold matching driver (importance layer #153, component 1).
Flags the specific principle a chair relied on (gold_chair / chair_cited) or a
digest highlights (gold_digest), by embedding-matching match_context /
headline_holding to the cited precedent's principles. EMBEDDING-ONLY (no LLM).
All logic in services/principle_gold (reused by the periodic refresh job, G2).
cd ~/legal-ai/mcp-server
HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py # dry-run
HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py --apply # write flags
HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py --threshold 0.75
"""
from __future__ import annotations
import argparse
import asyncio
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "mcp-server", "src"))
from legal_mcp.services import principle_gold # noqa: E402
async def _run(apply: bool, threshold: float | None) -> int:
mode = "APPLY" if apply else "DRY-RUN"
print(f"[{mode}] principle-level gold matching (embedding-only)\n", flush=True)
s = await principle_gold.match_all(threshold=threshold, apply=apply)
print(f"threshold: {s['threshold']}")
print(f"\nchair citations processed: {s['chair_citations']}")
print(f" → gold_chair (our chair, tier-1): {s['gold_chair']}")
print(f" → chair_cited (other chair, tier-2): {s['chair_cited']}")
print(f" → no match ≥ threshold: {s['chair_no_match']}")
print(f"\ndigests processed: {s['digests']}")
print(f" → gold_digest (tier-1): {s['gold_digest']}")
print(f" → no match ≥ threshold: {s['digest_no_match']}")
if s.get("samples"):
print("\nsample matches (label · sim · halacha):")
for m in s["samples"]:
print(f" {m['label']:<18} sim={m['sim']} {m['halacha_id'][:8]}")
if s.get("coverage"):
c = s["coverage"]
print(f"\n── coverage (live original principles: {c['live_original']}) ──")
print(f" protected (gold_chair OR gold_digest): {c['protected']}")
print(f" gold_chair={c['gold_chair']} gold_digest={c['gold_digest']} chair_cited={c['chair_cited']}")
if not apply:
print("\ndry-run — no flags written. Re-run with --apply to commit.")
return 0
def main() -> int:
p = argparse.ArgumentParser(description="Principle-level gold matching (#153)")
p.add_argument("--apply", action="store_true", help="write gold flags (idempotent)")
p.add_argument("--threshold", type=float, default=None, help="override match cosine floor")
a = p.parse_args()
return asyncio.run(_run(a.apply, a.threshold))
if __name__ == "__main__":
raise SystemExit(main())