From ffa089e1dfda91c2f44f93015527189fce1eabdb Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 11 Apr 2026 12:28:52 +0000 Subject: [PATCH] Fix compare sections query: match by number segment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document titles are '[קורפוס] ARAR-23-1188 - ...' but decision_number is '1188/23' — previous LIKE %1188/23% wouldn't match. Now extracts the first numeric segment and matches against title. Co-Authored-By: Claude Opus 4.6 (1M context) --- web/app.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/web/app.py b/web/app.py index 9d2b885..8f50532 100644 --- a/web/app.py +++ b/web/app.py @@ -802,16 +802,24 @@ async def training_compare(a: str, b: str): "FROM style_patterns WHERE frequency > 0" ) - # Section breakdown via document_chunks + # Section breakdown via document_chunks. + # decision_number format is "NNNN/YY" but document titles are like + # "[קורפוס] ARAR-YY-NNNN - ..." so we match on the number segment only. async def section_stats(corpus_row): nm = corpus_row["decision_number"] if not nm: return [] + # Extract the first numeric segment (e.g., "1188" from "1188/23") + num_match = re.match(r"(\d{3,4})", nm) + num = num_match.group(1) if num_match else nm rows2 = await conn.fetch( "SELECT dc.section_type, sum(length(dc.content))::int as chars " "FROM document_chunks dc JOIN documents d ON dc.document_id=d.id " - "WHERE d.title LIKE $1 AND dc.section_type IS NOT NULL " + "WHERE d.title LIKE '[קורפוס]%' " + " AND (d.title LIKE $1 OR d.title LIKE $2) " + " AND dc.section_type IS NOT NULL " "GROUP BY dc.section_type ORDER BY chars DESC", + f"%{num}%", f"%{nm}%", ) return [{"type": r["section_type"], "chars": r["chars"]} for r in rows2]