From ffa089e1dfda91c2f44f93015527189fce1eabdb Mon Sep 17 00:00:00 2001
From: Chaim <chaim@marcus-law.co.il>
Date: Sat, 11 Apr 2026 12:28:52 +0000
Subject: [PATCH] Fix compare sections query: match by number segment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document titles are '[קורפוס] ARAR-23-1188 - ...' but decision_number
is '1188/23' — previous LIKE %1188/23% wouldn't match. Now extracts
the first numeric segment and matches against title.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 web/app.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/web/app.py b/web/app.py
index 9d2b885..8f50532 100644
--- a/web/app.py
+++ b/web/app.py
@@ -802,16 +802,24 @@ async def training_compare(a: str, b: str):
             "FROM style_patterns WHERE frequency > 0"
         )
 
-        # Section breakdown via document_chunks
+        # Section breakdown via document_chunks.
+        # decision_number format is "NNNN/YY" but document titles are like
+        # "[קורפוס] ARAR-YY-NNNN - ..." so we match on the number segment only.
         async def section_stats(corpus_row):
             nm = corpus_row["decision_number"]
             if not nm:
                 return []
+            # Extract the first numeric segment (e.g., "1188" from "1188/23")
+            num_match = re.match(r"(\d{3,4})", nm)
+            num = num_match.group(1) if num_match else nm
             rows2 = await conn.fetch(
                 "SELECT dc.section_type, sum(length(dc.content))::int as chars "
                 "FROM document_chunks dc JOIN documents d ON dc.document_id=d.id "
-                "WHERE d.title LIKE $1 AND dc.section_type IS NOT NULL "
+                "WHERE d.title LIKE '[קורפוס]%' "
+                "  AND (d.title LIKE $1 OR d.title LIKE $2) "
+                "  AND dc.section_type IS NOT NULL "
                 "GROUP BY dc.section_type ORDER BY chars DESC",
+                f"%{num}%",
                 f"%{nm}%",
             )
             return [{"type": r["section_type"], "chars": r["chars"]} for r in rows2]