feat(corroboration): halacha matcher + cosine threshold (INV-COR3, X11)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 18:57:47 +00:00
parent 09eec6a906
commit dbc176ae66
4 changed files with 31 additions and 0 deletions

View File

@@ -54,6 +54,8 @@ REDIS_URL = os.environ.get("REDIS_URL", "redis://127.0.0.1:6380/0")
# pinned. # pinned.
HALACHA_EXTRACT_MODEL = os.environ.get("HALACHA_EXTRACT_MODEL", "claude-opus-4-8") HALACHA_EXTRACT_MODEL = os.environ.get("HALACHA_EXTRACT_MODEL", "claude-opus-4-8")
HALACHA_EXTRACT_EFFORT = os.environ.get("HALACHA_EXTRACT_EFFORT", "xhigh") HALACHA_EXTRACT_EFFORT = os.environ.get("HALACHA_EXTRACT_EFFORT", "xhigh")
HALACHA_CORROBORATION_MATCH_FLOOR = float(os.environ.get("HALACHA_CORROBORATION_MATCH_FLOOR", "0.50"))
HALACHA_CORROBORATION_MIN_CITES = int(os.environ.get("HALACHA_CORROBORATION_MIN_CITES", "2"))
# Voyage AI # Voyage AI
VOYAGE_API_KEY = os.environ.get("VOYAGE_API_KEY", "") VOYAGE_API_KEY = os.environ.get("VOYAGE_API_KEY", "")

View File

@@ -25,6 +25,14 @@ def _coerce_treatment(raw: dict) -> str:
return t if t in _VALID_TREATMENT else "mentioned" return t if t in _VALID_TREATMENT else "mentioned"
def accept_match(best: tuple[str, float] | None, floor: float = config.HALACHA_CORROBORATION_MATCH_FLOOR) -> str | None:
"""Return the halacha_id iff similarity clears the floor (INV-COR3)."""
if not best:
return None
halacha_id, sim = best
return halacha_id if sim >= floor else None
_TREATMENT_PROMPT = """אתה משפטן בכיר. נתון ציטוט של פסק/החלטה קודמים בתוך החלטה מאוחרת. _TREATMENT_PROMPT = """אתה משפטן בכיר. נתון ציטוט של פסק/החלטה קודמים בתוך החלטה מאוחרת.
סווג כיצד ההחלטה המאוחרת **מטפלת** בתקדים המצוטט, לפי אחת מהקטגוריות: סווג כיצד ההחלטה המאוחרת **מטפלת** בתקדים המצוטט, לפי אחת מהקטגוריות:
- followed — אימצה והחילה את ההלכה. - followed — אימצה והחילה את ההלכה.

View File

@@ -3386,6 +3386,18 @@ async def update_halacha(
return dict(row) if row else None return dict(row) if row else None
async def nearest_halacha_for_vector(case_law_id: UUID, vec: list[float]) -> tuple[str, float] | None:
"""Best-matching halacha of `case_law_id` for a context embedding (cosine)."""
pool = await get_pool()
row = await pool.fetchrow(
"SELECT id::text AS id, 1 - (embedding <=> $2) AS sim "
"FROM halachot WHERE case_law_id = $1 AND embedding IS NOT NULL "
"ORDER BY embedding <=> $2 LIMIT 1",
case_law_id, vec,
)
return (row["id"], float(row["sim"])) if row else None
async def search_precedent_library_semantic( async def search_precedent_library_semantic(
query_embedding: list[float], query_embedding: list[float],
practice_area: str = "", practice_area: str = "",

View File

@@ -15,3 +15,12 @@ def test_treatment_polarity():
assert cor.is_positive("followed") and cor.is_positive("explained") assert cor.is_positive("followed") and cor.is_positive("explained")
assert cor.is_negative("distinguished") and cor.is_negative("overruled") assert cor.is_negative("distinguished") and cor.is_negative("overruled")
assert not cor.is_positive("mentioned") and not cor.is_negative("mentioned") assert not cor.is_positive("mentioned") and not cor.is_negative("mentioned")
def test_match_accepts_above_threshold():
assert cor.accept_match(("h1", 0.62), floor=0.50) == "h1"
def test_match_rejects_below_threshold():
assert cor.accept_match(("h1", 0.41), floor=0.50) is None
def test_match_rejects_empty():
assert cor.accept_match(None, floor=0.50) is None