diff --git a/mcp-server/src/legal_mcp/services/ingest.py b/mcp-server/src/legal_mcp/services/ingest.py index 35d56f8..7983c87 100644 --- a/mcp-server/src/legal_mcp/services/ingest.py +++ b/mcp-server/src/legal_mcp/services/ingest.py @@ -182,6 +182,7 @@ async def ingest_document( try: stored_chunks = await _chunk_embed_store(case_law_id, raw_text, page_offsets, page_count, progress) + await db.mark_indexed(case_law_id) # Step 9: multimodal — uniform: flag + PDF + page_count, NOT intake type. if (config.MULTIMODAL_ENABLED and page_count > 0 @@ -256,3 +257,27 @@ async def _chunk_embed_store(case_law_id, text, page_offsets, page_count, progre for c, v in zip(chunks, chunk_vectors) ] return await db.store_precedent_chunks(case_law_id, chunk_dicts) + + +async def reindex_case_law( + case_law_id: "UUID | str", + progress: ProgressCb | None = None, +) -> dict: + """Re-chunk + re-embed an existing case_law row from its STORED full_text (GAP-09). + + No re-extract / no re-OCR (uses the stored text — see feedback_no_reocr_retrofit) + and no LLM/CLI (only chunker + voyage embeddings), so it is safe to run anywhere. + Idempotent: store_precedent_chunks(_hierarchical) is DELETE-then-INSERT. + """ + progress = progress or _noop_progress + cid = case_law_id if isinstance(case_law_id, UUID) else UUID(str(case_law_id)) + row = await db.get_case_law(cid) + if not row: + raise ValueError(f"case_law not found: {cid}") + text = (row.get("full_text") or "").strip() + if not text: + raise ValueError("case_law has no stored full_text to re-index") + stored = await _chunk_embed_store(cid, text, None, 0, progress) + await db.mark_indexed(cid) + await progress("completed", 100, f"הוטמע מחדש: {stored} chunks") + return {"status": "completed", "case_law_id": str(cid), "chunks": stored, "reindexed": True}