docs(principles): move research into docs/precedent-corpus-redesign/ (README + research-full) (#153)
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
61
scripts/compute_principle_gold.py
Normal file
61
scripts/compute_principle_gold.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Principle-level gold matching driver (importance layer #153, component 1).
|
||||
|
||||
Flags the specific principle a chair relied on (gold_chair / chair_cited) or a
|
||||
digest highlights (gold_digest), by embedding-matching match_context /
|
||||
headline_holding to the cited precedent's principles. EMBEDDING-ONLY (no LLM).
|
||||
All logic in services/principle_gold (reused by the periodic refresh job, G2).
|
||||
|
||||
cd ~/legal-ai/mcp-server
|
||||
HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py # dry-run
|
||||
HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py --apply # write flags
|
||||
HOME=/home/chaim .venv/bin/python ../scripts/compute_principle_gold.py --threshold 0.75
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "mcp-server", "src"))
|
||||
|
||||
from legal_mcp.services import principle_gold # noqa: E402
|
||||
|
||||
|
||||
async def _run(apply: bool, threshold: float | None) -> int:
|
||||
mode = "APPLY" if apply else "DRY-RUN"
|
||||
print(f"[{mode}] principle-level gold matching (embedding-only)\n", flush=True)
|
||||
s = await principle_gold.match_all(threshold=threshold, apply=apply)
|
||||
print(f"threshold: {s['threshold']}")
|
||||
print(f"\nchair citations processed: {s['chair_citations']}")
|
||||
print(f" → gold_chair (our chair, tier-1): {s['gold_chair']}")
|
||||
print(f" → chair_cited (other chair, tier-2): {s['chair_cited']}")
|
||||
print(f" → no match ≥ threshold: {s['chair_no_match']}")
|
||||
print(f"\ndigests processed: {s['digests']}")
|
||||
print(f" → gold_digest (tier-1): {s['gold_digest']}")
|
||||
print(f" → no match ≥ threshold: {s['digest_no_match']}")
|
||||
if s.get("samples"):
|
||||
print("\nsample matches (label · sim · halacha):")
|
||||
for m in s["samples"]:
|
||||
print(f" {m['label']:<18} sim={m['sim']} {m['halacha_id'][:8]}")
|
||||
if s.get("coverage"):
|
||||
c = s["coverage"]
|
||||
print(f"\n── coverage (live original principles: {c['live_original']}) ──")
|
||||
print(f" protected (gold_chair OR gold_digest): {c['protected']}")
|
||||
print(f" gold_chair={c['gold_chair']} gold_digest={c['gold_digest']} chair_cited={c['chair_cited']}")
|
||||
if not apply:
|
||||
print("\ndry-run — no flags written. Re-run with --apply to commit.")
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
p = argparse.ArgumentParser(description="Principle-level gold matching (#153)")
|
||||
p.add_argument("--apply", action="store_true", help="write gold flags (idempotent)")
|
||||
p.add_argument("--threshold", type=float, default=None, help="override match cosine floor")
|
||||
a = p.parse_args()
|
||||
return asyncio.run(_run(a.apply, a.threshold))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user