Fix compare sections query: match by number segment
Document titles are '[קורפוס] ARAR-23-1188 - ...' but decision_number is '1188/23' — previous LIKE %1188/23% wouldn't match. Now extracts the first numeric segment and matches against title. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
12
web/app.py
12
web/app.py
@@ -802,16 +802,24 @@ async def training_compare(a: str, b: str):
|
|||||||
"FROM style_patterns WHERE frequency > 0"
|
"FROM style_patterns WHERE frequency > 0"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Section breakdown via document_chunks
|
# Section breakdown via document_chunks.
|
||||||
|
# decision_number format is "NNNN/YY" but document titles are like
|
||||||
|
# "[קורפוס] ARAR-YY-NNNN - ..." so we match on the number segment only.
|
||||||
async def section_stats(corpus_row):
|
async def section_stats(corpus_row):
|
||||||
nm = corpus_row["decision_number"]
|
nm = corpus_row["decision_number"]
|
||||||
if not nm:
|
if not nm:
|
||||||
return []
|
return []
|
||||||
|
# Extract the first numeric segment (e.g., "1188" from "1188/23")
|
||||||
|
num_match = re.match(r"(\d{3,4})", nm)
|
||||||
|
num = num_match.group(1) if num_match else nm
|
||||||
rows2 = await conn.fetch(
|
rows2 = await conn.fetch(
|
||||||
"SELECT dc.section_type, sum(length(dc.content))::int as chars "
|
"SELECT dc.section_type, sum(length(dc.content))::int as chars "
|
||||||
"FROM document_chunks dc JOIN documents d ON dc.document_id=d.id "
|
"FROM document_chunks dc JOIN documents d ON dc.document_id=d.id "
|
||||||
"WHERE d.title LIKE $1 AND dc.section_type IS NOT NULL "
|
"WHERE d.title LIKE '[קורפוס]%' "
|
||||||
|
" AND (d.title LIKE $1 OR d.title LIKE $2) "
|
||||||
|
" AND dc.section_type IS NOT NULL "
|
||||||
"GROUP BY dc.section_type ORDER BY chars DESC",
|
"GROUP BY dc.section_type ORDER BY chars DESC",
|
||||||
|
f"%{num}%",
|
||||||
f"%{nm}%",
|
f"%{nm}%",
|
||||||
)
|
)
|
||||||
return [{"type": r["section_type"], "chars": r["chars"]} for r in rows2]
|
return [{"type": r["section_type"], "chars": r["chars"]} for r in rows2]
|
||||||
|
|||||||
Reference in New Issue
Block a user