Fix compare sections query: match by number segment

Document titles are '[קורפוס] ARAR-23-1188 - ...' but decision_number is '1188/23' — previous LIKE %1188/23% wouldn't match. Now extracts the first numeric segment and matches against title. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 12:28:52 +00:00
parent 5cb0be473c
commit ffa089e1df
1 changed files with 10 additions and 2 deletions
--- a/web/app.py
+++ b/web/app.py
@@ -802,16 +802,24 @@ async def training_compare(a: str, b: str):
            "FROM style_patterns WHERE frequency > 0"
        )

-        # Section breakdown via document_chunks
+        # Section breakdown via document_chunks.
+        # decision_number format is "NNNN/YY" but document titles are like
+        # "[קורפוס] ARAR-YY-NNNN - ..." so we match on the number segment only.
        async def section_stats(corpus_row):
            nm = corpus_row["decision_number"]
            if not nm:
                return []
+            # Extract the first numeric segment (e.g., "1188" from "1188/23")
+            num_match = re.match(r"(\d{3,4})", nm)
+            num = num_match.group(1) if num_match else nm
            rows2 = await conn.fetch(
                "SELECT dc.section_type, sum(length(dc.content))::int as chars "
                "FROM document_chunks dc JOIN documents d ON dc.document_id=d.id "
-                "WHERE d.title LIKE $1 AND dc.section_type IS NOT NULL "
+                "WHERE d.title LIKE '[קורפוס]%' "
+                "  AND (d.title LIKE $1 OR d.title LIKE $2) "
+                "  AND dc.section_type IS NOT NULL "
                "GROUP BY dc.section_type ORDER BY chars DESC",
+                f"%{num}%",
                f"%{nm}%",
            )
            return [{"type": r["section_type"], "chars": r["chars"]} for r in rows2]