feat(reindex): reindex_case_law from stored text + mark_indexed on ingest (GAP-09, FU-3)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -182,6 +182,7 @@ async def ingest_document(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
stored_chunks = await _chunk_embed_store(case_law_id, raw_text, page_offsets, page_count, progress)
|
stored_chunks = await _chunk_embed_store(case_law_id, raw_text, page_offsets, page_count, progress)
|
||||||
|
await db.mark_indexed(case_law_id)
|
||||||
|
|
||||||
# Step 9: multimodal — uniform: flag + PDF + page_count, NOT intake type.
|
# Step 9: multimodal — uniform: flag + PDF + page_count, NOT intake type.
|
||||||
if (config.MULTIMODAL_ENABLED and page_count > 0
|
if (config.MULTIMODAL_ENABLED and page_count > 0
|
||||||
@@ -256,3 +257,27 @@ async def _chunk_embed_store(case_law_id, text, page_offsets, page_count, progre
|
|||||||
for c, v in zip(chunks, chunk_vectors)
|
for c, v in zip(chunks, chunk_vectors)
|
||||||
]
|
]
|
||||||
return await db.store_precedent_chunks(case_law_id, chunk_dicts)
|
return await db.store_precedent_chunks(case_law_id, chunk_dicts)
|
||||||
|
|
||||||
|
|
||||||
|
async def reindex_case_law(
|
||||||
|
case_law_id: "UUID | str",
|
||||||
|
progress: ProgressCb | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Re-chunk + re-embed an existing case_law row from its STORED full_text (GAP-09).
|
||||||
|
|
||||||
|
No re-extract / no re-OCR (uses the stored text — see feedback_no_reocr_retrofit)
|
||||||
|
and no LLM/CLI (only chunker + voyage embeddings), so it is safe to run anywhere.
|
||||||
|
Idempotent: store_precedent_chunks(_hierarchical) is DELETE-then-INSERT.
|
||||||
|
"""
|
||||||
|
progress = progress or _noop_progress
|
||||||
|
cid = case_law_id if isinstance(case_law_id, UUID) else UUID(str(case_law_id))
|
||||||
|
row = await db.get_case_law(cid)
|
||||||
|
if not row:
|
||||||
|
raise ValueError(f"case_law not found: {cid}")
|
||||||
|
text = (row.get("full_text") or "").strip()
|
||||||
|
if not text:
|
||||||
|
raise ValueError("case_law has no stored full_text to re-index")
|
||||||
|
stored = await _chunk_embed_store(cid, text, None, 0, progress)
|
||||||
|
await db.mark_indexed(cid)
|
||||||
|
await progress("completed", 100, f"הוטמע מחדש: {stored} chunks")
|
||||||
|
return {"status": "completed", "case_law_id": str(cid), "chunks": stored, "reindexed": True}
|
||||||
|
|||||||
Reference in New Issue
Block a user