Fix compare sections query: match by number segment
Document titles are '[קורפוס] ARAR-23-1188 - ...' but decision_number is '1188/23' — previous LIKE %1188/23% wouldn't match. Now extracts the first numeric segment and matches against title. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
12
web/app.py
12
web/app.py
@@ -802,16 +802,24 @@ async def training_compare(a: str, b: str):
|
||||
"FROM style_patterns WHERE frequency > 0"
|
||||
)
|
||||
|
||||
# Section breakdown via document_chunks
|
||||
# Section breakdown via document_chunks.
|
||||
# decision_number format is "NNNN/YY" but document titles are like
|
||||
# "[קורפוס] ARAR-YY-NNNN - ..." so we match on the number segment only.
|
||||
async def section_stats(corpus_row):
|
||||
nm = corpus_row["decision_number"]
|
||||
if not nm:
|
||||
return []
|
||||
# Extract the first numeric segment (e.g., "1188" from "1188/23")
|
||||
num_match = re.match(r"(\d{3,4})", nm)
|
||||
num = num_match.group(1) if num_match else nm
|
||||
rows2 = await conn.fetch(
|
||||
"SELECT dc.section_type, sum(length(dc.content))::int as chars "
|
||||
"FROM document_chunks dc JOIN documents d ON dc.document_id=d.id "
|
||||
"WHERE d.title LIKE $1 AND dc.section_type IS NOT NULL "
|
||||
"WHERE d.title LIKE '[קורפוס]%' "
|
||||
" AND (d.title LIKE $1 OR d.title LIKE $2) "
|
||||
" AND dc.section_type IS NOT NULL "
|
||||
"GROUP BY dc.section_type ORDER BY chars DESC",
|
||||
f"%{num}%",
|
||||
f"%{nm}%",
|
||||
)
|
||||
return [{"type": r["section_type"], "chars": r["chars"]} for r in rows2]
|
||||
|
||||
Reference in New Issue
Block a user