feat(training): Style Studio — upload, rich corpus, lessons, curator portrait, chat
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 2m7s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 2m7s
Six-phase upgrade of /training from a read-only dashboard into a full Style Studio for managing Daphna's style corpus. - Upload Sheet on /training: file → proofread preview → commit (no more CLI-only `upload-training` skill). - Rich corpus metadata: GET /api/training/corpus returns summary, outcome, key_principles, page_count, parties (regex), legal_citation, lessons_count. PATCH endpoint for chair edits. CorpusDetailDrawer with 4 tabs (details /content/lessons/patterns) replaces the bare table row. - LLM metadata enrichment: style_metadata_extractor + MCP tools (style_corpus_enrich, style_corpus_pending_enrichment) fill summary /outcome/key_principles via claude_session (free, host-side). - Per-decision lessons: new decision_lessons table + 4 REST endpoints + LessonsTab in drawer; hermes-curator now auto-posts findings as decision_lessons(source=curator). - Curator Portrait tab: prompt rendered with link to Gitea, recent curator findings, style_analyzer training prompts, propose-change form that writes proposals to data/curator-proposals/ for manual chair review (no auto-mutation of the agent file). - Style chat tab: SSE-streamed conversations with the style agent. New host-side pm2 service (legal-chat-service, port 8770) wraps claude CLI with stream-json + --resume continuation; FastAPI proxies via host.docker.internal. Zero API cost — uses chaim's claude.ai subscription. chat_conversations + chat_messages persist history. Architecture: keeps the existing rule that claude_session only runs on the host (not the container). The new legal-chat-service is the canonical bridge between the container and the local CLI for the chat feature; everything else (upload, metadata, lessons) stays within the container's existing capabilities. Audit script (scripts/audit_training_corpus.py) included for verifying which corpus rows still need enrichment. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -194,6 +194,55 @@ ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT ''
|
||||
-- הרחבת style_patterns עם appeal_subtype לניתוח סגנון נפרד לכל סוג ערר
|
||||
ALTER TABLE style_patterns ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
|
||||
|
||||
-- decision_lessons: per-decision learnings the chair / curator / style_analyzer
|
||||
-- attaches to a corpus row. The generic legal-decision-lessons.md file stays
|
||||
-- as the source of truth for cross-corpus patterns; this table stores the
|
||||
-- granular "what we learned from THIS decision" notes that drive the writer's
|
||||
-- future drafts and let the curator look up prior observations on the same row.
|
||||
CREATE TABLE IF NOT EXISTS decision_lessons (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
style_corpus_id UUID NOT NULL REFERENCES style_corpus(id) ON DELETE CASCADE,
|
||||
lesson_text TEXT NOT NULL,
|
||||
category TEXT DEFAULT 'general', -- style / structure / lexicon / tabular / general
|
||||
source TEXT DEFAULT 'manual', -- manual / curator / chair / style_analyzer
|
||||
applied_to_skill BOOLEAN DEFAULT false, -- has this been promoted into SKILL.md?
|
||||
created_by TEXT DEFAULT 'chaim',
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_decision_lessons_corpus ON decision_lessons(style_corpus_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_decision_lessons_applied ON decision_lessons(applied_to_skill);
|
||||
|
||||
-- chat_conversations / chat_messages: persistent history for the
|
||||
-- "שיחה עם הסוכן" tab on /training. Each conversation can optionally be
|
||||
-- scoped to a single style_corpus row (when the chair starts a chat
|
||||
-- "about decision X"). claude_session_id is the value the local claude
|
||||
-- CLI returns in stream-json — we pass it back via `--resume` on the
|
||||
-- next message so the model continues the same conversation without
|
||||
-- re-loading the system prompt every time.
|
||||
CREATE TABLE IF NOT EXISTS chat_conversations (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
title TEXT NOT NULL DEFAULT 'שיחה חדשה',
|
||||
style_corpus_id UUID REFERENCES style_corpus(id) ON DELETE SET NULL,
|
||||
claude_session_id TEXT,
|
||||
system_prompt_version TEXT DEFAULT 'v1',
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
last_message_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chat_messages (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
conversation_id UUID NOT NULL REFERENCES chat_conversations(id) ON DELETE CASCADE,
|
||||
role TEXT NOT NULL, -- 'user' | 'assistant'
|
||||
content TEXT NOT NULL,
|
||||
raw_events JSONB DEFAULT '[]', -- stream-json events for the assistant turn (optional, for debug)
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chat_messages_conv ON chat_messages(conversation_id, created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_chat_conv_corpus ON chat_conversations(style_corpus_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_chat_conv_last ON chat_conversations(last_message_at DESC);
|
||||
|
||||
-- טבלת qa_results
|
||||
CREATE TABLE IF NOT EXISTS qa_results (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
@@ -1609,6 +1658,284 @@ async def delete_from_style_corpus(corpus_id: UUID) -> dict:
|
||||
}
|
||||
|
||||
|
||||
async def get_style_corpus_row(corpus_id: UUID) -> dict | None:
|
||||
"""Return a single style_corpus row by id, or None if missing."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT id, document_id, decision_number, decision_date,
|
||||
subject_categories, full_text, summary, outcome,
|
||||
key_principles, practice_area, appeal_subtype, created_at
|
||||
FROM style_corpus WHERE id = $1
|
||||
""",
|
||||
corpus_id,
|
||||
)
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
async def update_style_corpus_metadata(
|
||||
corpus_id: UUID,
|
||||
*,
|
||||
summary: str | None = None,
|
||||
outcome: str | None = None,
|
||||
key_principles: list[str] | None = None,
|
||||
appeal_subtype: str | None = None,
|
||||
practice_area: str | None = None,
|
||||
overwrite: bool = False,
|
||||
) -> dict:
|
||||
"""Patch the enriched-metadata columns of a style_corpus row.
|
||||
|
||||
By default, only empty columns are filled — passing ``overwrite=True``
|
||||
is the caller's signal that they intentionally want to replace existing
|
||||
values (used by the re-extract flow when the chair runs it manually).
|
||||
"""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT summary, outcome, key_principles, appeal_subtype, practice_area "
|
||||
"FROM style_corpus WHERE id = $1",
|
||||
corpus_id,
|
||||
)
|
||||
if not existing:
|
||||
return {"updated": False, "reason": "not found"}
|
||||
|
||||
sets: dict = {}
|
||||
if summary is not None and (overwrite or not (existing["summary"] or "").strip()):
|
||||
sets["summary"] = summary
|
||||
if outcome is not None and (overwrite or not (existing["outcome"] or "").strip()):
|
||||
sets["outcome"] = outcome
|
||||
if key_principles is not None:
|
||||
current = existing["key_principles"]
|
||||
if isinstance(current, str):
|
||||
try:
|
||||
current = json.loads(current)
|
||||
except json.JSONDecodeError:
|
||||
current = []
|
||||
if overwrite or not (current or []):
|
||||
sets["key_principles"] = json.dumps(key_principles)
|
||||
if appeal_subtype is not None and (overwrite or not (existing["appeal_subtype"] or "").strip()):
|
||||
sets["appeal_subtype"] = appeal_subtype
|
||||
if practice_area is not None and (overwrite or not (existing["practice_area"] or "").strip()):
|
||||
sets["practice_area"] = practice_area
|
||||
|
||||
if not sets:
|
||||
return {"updated": False, "reason": "nothing to update", "fields": []}
|
||||
|
||||
cols = list(sets.keys())
|
||||
set_clause = ", ".join(f"{c} = ${i + 2}" for i, c in enumerate(cols))
|
||||
values = [sets[c] for c in cols]
|
||||
await conn.execute(
|
||||
f"UPDATE style_corpus SET {set_clause} WHERE id = $1",
|
||||
corpus_id, *values,
|
||||
)
|
||||
return {"updated": True, "fields": cols}
|
||||
|
||||
|
||||
# ── decision_lessons (per-corpus row notes) ────────────────────────
|
||||
|
||||
|
||||
async def list_decision_lessons(corpus_id: UUID) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"SELECT id, style_corpus_id, lesson_text, category, source, "
|
||||
" applied_to_skill, created_by, created_at, updated_at "
|
||||
"FROM decision_lessons WHERE style_corpus_id = $1 "
|
||||
"ORDER BY created_at DESC",
|
||||
corpus_id,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def add_decision_lesson(
|
||||
corpus_id: UUID,
|
||||
*,
|
||||
lesson_text: str,
|
||||
category: str = "general",
|
||||
source: str = "manual",
|
||||
created_by: str = "chaim",
|
||||
) -> dict:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"INSERT INTO decision_lessons "
|
||||
"(style_corpus_id, lesson_text, category, source, created_by) "
|
||||
"VALUES ($1, $2, $3, $4, $5) "
|
||||
"RETURNING id, style_corpus_id, lesson_text, category, source, "
|
||||
" applied_to_skill, created_by, created_at, updated_at",
|
||||
corpus_id, lesson_text, category, source, created_by,
|
||||
)
|
||||
return dict(row) if row else {}
|
||||
|
||||
|
||||
async def update_decision_lesson(
|
||||
lesson_id: UUID,
|
||||
*,
|
||||
lesson_text: str | None = None,
|
||||
category: str | None = None,
|
||||
applied_to_skill: bool | None = None,
|
||||
) -> dict:
|
||||
sets: dict = {}
|
||||
if lesson_text is not None:
|
||||
sets["lesson_text"] = lesson_text
|
||||
if category is not None:
|
||||
sets["category"] = category
|
||||
if applied_to_skill is not None:
|
||||
sets["applied_to_skill"] = applied_to_skill
|
||||
if not sets:
|
||||
return {"updated": False, "reason": "nothing to update"}
|
||||
sets["updated_at"] = "now()" # sentinel — replaced inline below
|
||||
cols = [c for c in sets if c != "updated_at"]
|
||||
set_clause = ", ".join(f"{c} = ${i + 2}" for i, c in enumerate(cols))
|
||||
set_clause += ", updated_at = now()"
|
||||
values = [sets[c] for c in cols]
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
f"UPDATE decision_lessons SET {set_clause} WHERE id = $1 "
|
||||
f"RETURNING id, style_corpus_id, lesson_text, category, source, "
|
||||
f" applied_to_skill, updated_at",
|
||||
lesson_id, *values,
|
||||
)
|
||||
if not row:
|
||||
return {"updated": False, "reason": "not found"}
|
||||
return {"updated": True, **dict(row)}
|
||||
|
||||
|
||||
async def delete_decision_lesson(lesson_id: UUID) -> dict:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute(
|
||||
"DELETE FROM decision_lessons WHERE id = $1", lesson_id,
|
||||
)
|
||||
# asyncpg returns "DELETE n"
|
||||
deleted = result.split(" ", 1)[1].strip() if " " in result else "0"
|
||||
return {"deleted": deleted != "0"}
|
||||
|
||||
|
||||
async def count_decision_lessons_per_corpus() -> dict[str, int]:
|
||||
"""Map style_corpus.id (str) → lesson count, for badge display in the list."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"SELECT style_corpus_id, count(*) AS n "
|
||||
"FROM decision_lessons GROUP BY style_corpus_id"
|
||||
)
|
||||
return {str(r["style_corpus_id"]): r["n"] for r in rows}
|
||||
|
||||
|
||||
# ── chat (style agent conversations) ───────────────────────────────
|
||||
|
||||
|
||||
async def create_chat_conversation(
|
||||
*,
|
||||
title: str = "שיחה חדשה",
|
||||
style_corpus_id: UUID | None = None,
|
||||
system_prompt_version: str = "v1",
|
||||
) -> dict:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"INSERT INTO chat_conversations "
|
||||
"(title, style_corpus_id, system_prompt_version) "
|
||||
"VALUES ($1, $2, $3) "
|
||||
"RETURNING id, title, style_corpus_id, claude_session_id, "
|
||||
" system_prompt_version, created_at, last_message_at",
|
||||
title, style_corpus_id, system_prompt_version,
|
||||
)
|
||||
return dict(row) if row else {}
|
||||
|
||||
|
||||
async def list_chat_conversations(limit: int = 50) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT c.id, c.title, c.style_corpus_id, c.claude_session_id,
|
||||
c.created_at, c.last_message_at,
|
||||
sc.decision_number,
|
||||
(SELECT count(*) FROM chat_messages m WHERE m.conversation_id = c.id) AS message_count
|
||||
FROM chat_conversations c
|
||||
LEFT JOIN style_corpus sc ON sc.id = c.style_corpus_id
|
||||
ORDER BY c.last_message_at DESC NULLS LAST
|
||||
LIMIT $1
|
||||
""",
|
||||
limit,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def get_chat_conversation(conv_id: UUID) -> dict | None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"SELECT id, title, style_corpus_id, claude_session_id, "
|
||||
" system_prompt_version, created_at, last_message_at "
|
||||
"FROM chat_conversations WHERE id = $1",
|
||||
conv_id,
|
||||
)
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
async def delete_chat_conversation(conv_id: UUID) -> dict:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute(
|
||||
"DELETE FROM chat_conversations WHERE id = $1", conv_id,
|
||||
)
|
||||
deleted = result.split(" ", 1)[1].strip() if " " in result else "0"
|
||||
return {"deleted": deleted != "0"}
|
||||
|
||||
|
||||
async def update_chat_conversation_session_id(
|
||||
conv_id: UUID, claude_session_id: str,
|
||||
) -> None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"UPDATE chat_conversations SET claude_session_id = $1, "
|
||||
" last_message_at = now() "
|
||||
"WHERE id = $2",
|
||||
claude_session_id, conv_id,
|
||||
)
|
||||
|
||||
|
||||
async def add_chat_message(
|
||||
conv_id: UUID,
|
||||
*,
|
||||
role: str,
|
||||
content: str,
|
||||
raw_events: list | None = None,
|
||||
) -> dict:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"INSERT INTO chat_messages "
|
||||
"(conversation_id, role, content, raw_events) "
|
||||
"VALUES ($1, $2, $3, $4) "
|
||||
"RETURNING id, conversation_id, role, content, created_at",
|
||||
conv_id, role, content, json.dumps(raw_events or []),
|
||||
)
|
||||
await conn.execute(
|
||||
"UPDATE chat_conversations SET last_message_at = now() WHERE id = $1",
|
||||
conv_id,
|
||||
)
|
||||
return dict(row) if row else {}
|
||||
|
||||
|
||||
async def list_chat_messages(conv_id: UUID) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"SELECT id, role, content, created_at "
|
||||
"FROM chat_messages WHERE conversation_id = $1 "
|
||||
"ORDER BY created_at ASC",
|
||||
conv_id,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def get_style_patterns(pattern_type: str | None = None) -> list[dict]:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
|
||||
Reference in New Issue
Block a user