Fix precedents search + auto-update case parties
block_writer: _build_precedents_context now searches both paragraph_embeddings (other decisions by Dafna) and case_law_embeddings (precedent case law). Previously only searched document_chunks which had no cross-case data. Now returns ~2400 chars from 3 other decisions. processor: Step 1.6 auto-updates case appellants/respondents from classifier results when they're empty. Prevents blank party fields. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -501,23 +501,51 @@ async def _build_plans_context(case_id: UUID) -> str:
|
|||||||
|
|
||||||
|
|
||||||
async def _build_precedents_context(case_id: UUID, block_id: str) -> str:
|
async def _build_precedents_context(case_id: UUID, block_id: str) -> str:
|
||||||
"""Search for similar precedent paragraphs."""
|
"""Search for similar precedent paragraphs from other decisions and case law."""
|
||||||
|
parts = []
|
||||||
try:
|
try:
|
||||||
case = await db.get_case(case_id)
|
case = await db.get_case(case_id)
|
||||||
|
case_number = case.get("case_number", "") if case else ""
|
||||||
subject = case.get("subject", "") if case else ""
|
subject = case.get("subject", "") if case else ""
|
||||||
query = f"דיון משפטי בנושא {subject}" if subject else "דיון משפטי ועדת ערר"
|
query = f"דיון משפטי בנושא {subject}" if subject else "דיון משפטי ועדת ערר"
|
||||||
query_emb = await embeddings.embed_query(query)
|
query_emb = await embeddings.embed_query(query)
|
||||||
results = await db.search_similar(query_embedding=query_emb, limit=5)
|
|
||||||
|
# Search 1: paragraph_embeddings (from other decisions by Dafna)
|
||||||
|
para_results = await db.search_similar_paragraphs(
|
||||||
|
query_embedding=query_emb, limit=10, block_type="block-yod",
|
||||||
|
)
|
||||||
# Filter out same case
|
# Filter out same case
|
||||||
results = [r for r in results if str(r.get("case_id")) != str(case_id)]
|
para_results = [r for r in para_results if r.get("case_number", "") != case_number]
|
||||||
if results:
|
for r in para_results[:4]:
|
||||||
parts = []
|
parts.append(
|
||||||
for r in results[:3]:
|
f"[החלטת {r.get('case_number', '?')} — {r.get('case_title', '')}, "
|
||||||
parts.append(f"[{r.get('case_number', '?')}, {r.get('section_type', '')}] {r['content'][:400]}")
|
f"בלוק {r.get('block_type', '')}]\n{r['content'][:500]}"
|
||||||
return "\n\n".join(parts)
|
)
|
||||||
|
|
||||||
|
# Search 2: case_law_embeddings (precedent case law)
|
||||||
|
pool = await db.get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
caselaw_rows = await conn.fetch(
|
||||||
|
"""SELECT cl.case_number, cl.case_name, cl.court, cl.summary, cl.key_quote,
|
||||||
|
1 - (cle.embedding <=> $1) AS score
|
||||||
|
FROM case_law_embeddings cle
|
||||||
|
JOIN case_law cl ON cl.id = cle.case_law_id
|
||||||
|
ORDER BY cle.embedding <=> $1
|
||||||
|
LIMIT 5""",
|
||||||
|
query_emb,
|
||||||
|
)
|
||||||
|
for r in caselaw_rows[:3]:
|
||||||
|
text = r["key_quote"] or r["summary"] or ""
|
||||||
|
if text:
|
||||||
|
parts.append(
|
||||||
|
f"[פסיקה: {r['case_number']} {r['case_name']} ({r.get('court', '')})] "
|
||||||
|
f"score={r['score']:.3f}\n{text[:400]}"
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Failed to fetch precedents: %s", e)
|
logger.warning("Failed to fetch precedents: %s", e)
|
||||||
return "(אין תקדימים)"
|
|
||||||
|
return "\n\n".join(parts) if parts else "(אין תקדימים)"
|
||||||
|
|
||||||
|
|
||||||
async def _build_style_context() -> str:
|
async def _build_style_context() -> str:
|
||||||
|
|||||||
@@ -57,6 +57,18 @@ async def process_document(document_id: UUID, case_id: UUID) -> dict:
|
|||||||
len(classification_result["parties"].get("respondents", [])),
|
len(classification_result["parties"].get("respondents", [])),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Step 1.6: Update case parties if empty
|
||||||
|
if case_id and case:
|
||||||
|
parties = classification_result.get("parties", {})
|
||||||
|
updates = {}
|
||||||
|
if not case.get("appellants") and parties.get("appellants"):
|
||||||
|
updates["appellants"] = parties["appellants"]
|
||||||
|
if not case.get("respondents") and parties.get("respondents"):
|
||||||
|
updates["respondents"] = parties["respondents"]
|
||||||
|
if updates:
|
||||||
|
await db.update_case(case_id, **updates)
|
||||||
|
logger.info("Updated case parties: %s", updates)
|
||||||
|
|
||||||
# Step 2: Chunk
|
# Step 2: Chunk
|
||||||
logger.info("Chunking document (%d chars)", len(text))
|
logger.info("Chunking document (%d chars)", len(text))
|
||||||
chunks = chunker.chunk_document(text)
|
chunks = chunker.chunk_document(text)
|
||||||
|
|||||||
Reference in New Issue
Block a user