From 4d1924c7e67fff4076632ea0abf98dac00c9944c Mon Sep 17 00:00:00 2001
From: Chaim <chaim@marcus-law.co.il>
Date: Sun, 3 May 2026 19:01:03 +0000
Subject: [PATCH] feat(halachot): auto-approve high-confidence halachot at
 insert
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Halachot extracted by halacha_extractor with confidence >= 0.80 are now
inserted with review_status='approved' instead of 'pending_review' —
they appear in search_precedent_library immediately. Halachot below the
threshold still require manual chair approval.

Threshold tunable via env (HALACHA_AUTO_APPROVE_THRESHOLD), defaults to
0.80. Rationale: 89% of historical extractions (356/400) score 0.80+,
spot-checks confirmed quality, and the manual review backlog was the
single biggest reason rerank-2 was returning passages-only on
ההבחנה-style queries.

After this change + the one-time backfill UPDATE, search now returns
9/10 halachot for "ההבחנה בין השבחה לפיצויים" instead of 0 — and the
top-3 are exact-match rules, not adjacent passages.

Reviewer field records "auto-approved (confidence ≥ X.XX)" with the
threshold value at insert time, for traceability.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 mcp-server/src/legal_mcp/config.py      | 11 +++++++++
 mcp-server/src/legal_mcp/services/db.py | 30 ++++++++++++++++++++-----
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/mcp-server/src/legal_mcp/config.py b/mcp-server/src/legal_mcp/config.py
index 611c5ba..95f8258 100644
--- a/mcp-server/src/legal_mcp/config.py
+++ b/mcp-server/src/legal_mcp/config.py
@@ -58,6 +58,17 @@ VOYAGE_RERANK_ENABLED = (
 # 50 was the depth used in the POC; balances recall vs rerank cost.
 VOYAGE_RERANK_FETCH_K = int(os.environ.get("VOYAGE_RERANK_FETCH_K", "50"))
 
+# Halacha extraction — auto-approve threshold. Halachot with extractor
+# confidence >= this value are inserted with review_status='approved'
+# instead of 'pending_review' (so they immediately appear in
+# search_precedent_library). Set to a value > 1.0 to disable auto-approval.
+# 0.80 baseline: 89% of historical extractions land here, manual spot-check
+# of 10 random samples confirmed quality. Tunable via env if drift is
+# observed (e.g. raise to 0.90 if false-positives appear).
+HALACHA_AUTO_APPROVE_THRESHOLD = float(
+    os.environ.get("HALACHA_AUTO_APPROVE_THRESHOLD", "0.80")
+)
+
 # Google Cloud Vision (OCR for scanned PDFs)
 GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "")
 
diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py
index c827f25..8e8ad1c 100644
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -1954,20 +1954,38 @@ async def delete_halachot(case_law_id: UUID) -> int:
 
 
 async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
-    """Bulk-insert extracted halachot. Always with review_status='pending_review'."""
+    """Bulk-insert extracted halachot.
+
+    Each halacha enters with review_status determined by extractor
+    confidence vs ``config.HALACHA_AUTO_APPROVE_THRESHOLD``:
+      - confidence >= threshold → 'approved' (visible to search immediately)
+      - else → 'pending_review' (chair must approve manually)
+
+    The auto-approval reviewer is recorded as 'auto' for traceability.
+    """
     if not halachot:
         return 0
+    threshold = config.HALACHA_AUTO_APPROVE_THRESHOLD
     pool = await get_pool()
     async with pool.acquire() as conn:
         for i, h in enumerate(halachot):
+            confidence = float(h.get("confidence", 0.0))
+            auto_approve = confidence >= threshold
+            review_status = "approved" if auto_approve else "pending_review"
+            reviewer = (
+                f"auto-approved (confidence ≥ {threshold:.2f})"
+                if auto_approve else None
+            )
+            reviewed_at_clause = "now()" if auto_approve else "NULL"
             await conn.execute(
-                """INSERT INTO halachot
+                f"""INSERT INTO halachot
                    (case_law_id, halacha_index, rule_statement, rule_type,
                     reasoning_summary, supporting_quote, page_reference,
                     practice_areas, subject_tags, cites, confidence,
-                    quote_verified, embedding, review_status)
+                    quote_verified, embedding, review_status,
+                    reviewer, reviewed_at)
                    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
-                           $12, $13, 'pending_review')""",
+                           $12, $13, $14, $15, {reviewed_at_clause})""",
                 case_law_id,
                 i,
                 h["rule_statement"],
@@ -1978,9 +1996,11 @@ async def store_halachot(case_law_id: UUID, halachot: list[dict]) -> int:
                 h.get("practice_areas", []),
                 h.get("subject_tags", []),
                 h.get("cites", []),
-                h.get("confidence", 0.0),
+                confidence,
                 h.get("quote_verified", False),
                 h.get("embedding"),
+                review_status,
+                reviewer,
             )
     return len(halachot)