feat(style-acq T1-T3): קורפוס-דוגמאות של דפנה לכותב (style_exemplars)
ממלא את ערוץ-הדוגמאות (B) של מערכת רכישת-הסגנון: הכותב מאחזר פסקאות-בלוק אמיתיות של דפנה בזמן כתיבה, ממוקדות section+outcome+practice_area. T1 — תשתית + backfill: - SCHEMA_V27: טבלת style_exemplars (purpose-built — בלי תיקים מזויפים בשרשרת decision_paragraphs). decision_number/source/section/outcome/practice_area+embedding. - db: insert/delete/search_style_exemplars + count_style_exemplars. - scripts/backfill_style_exemplars.py: מפצל קורפוס דפנה (style_corpus + internal_committee) לסעיפים→פסקאות, embed, שמירה. אידמפוטנטי, dry-run/apply. T2 — אחזור ממוקד: - search_style_exemplars(section, outcome, practice_area) — section=hard filter, outcome/practice_area=soft. block_writer._build_precedents_context ממפה block→section ומאחזר (ראשי), לצד הנתיב הישן (משלים). T3 — contrastive/adapt: - הדוגמאות מתויגות "מבנה/קול בלבד — התאם, אל תעתיק תוכן"; פסקה מלאה (1100 תווים). INV-LRN5 (טוהר — סגנון בלבד). G11. הרצת backfill --apply בנפרד. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1204,6 +1204,28 @@ CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_case ON draft_final_pairs(case_
|
||||
CREATE INDEX IF NOT EXISTS idx_draft_final_pairs_status ON draft_final_pairs(status);
|
||||
"""
|
||||
|
||||
SCHEMA_V27_SQL = """
|
||||
-- style_exemplars (T1-T3): block-level paragraphs from Dafna's OWN decisions
|
||||
-- (style_corpus + internal_committee finals), embedded for retrieval as
|
||||
-- style exemplars at write-time. Purpose-built so we DON'T fabricate synthetic
|
||||
-- cases just to reuse decision_paragraphs. INV-LRN5: style material only — the
|
||||
-- writer is told to adapt structure/voice, copy only boilerplate, never substance.
|
||||
CREATE TABLE IF NOT EXISTS style_exemplars (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
decision_number TEXT DEFAULT '',
|
||||
source TEXT DEFAULT '', -- style_corpus | internal_committee
|
||||
practice_area TEXT DEFAULT '',
|
||||
outcome TEXT DEFAULT '', -- rejection | partial_acceptance | full_acceptance | ''
|
||||
section TEXT DEFAULT 'other', -- background | claims | discussion | summary | other
|
||||
paragraph_text TEXT NOT NULL,
|
||||
word_count INTEGER DEFAULT 0,
|
||||
embedding vector(1024),
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_style_exemplars_section ON style_exemplars(section);
|
||||
CREATE INDEX IF NOT EXISTS idx_style_exemplars_decision ON style_exemplars(decision_number, source);
|
||||
"""
|
||||
|
||||
|
||||
async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
|
||||
async with pool.acquire() as conn:
|
||||
@@ -1234,7 +1256,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
|
||||
await conn.execute(SCHEMA_V24_SQL)
|
||||
await conn.execute(SCHEMA_V25_SQL)
|
||||
await conn.execute(SCHEMA_V26_SQL)
|
||||
logger.info("Database schema initialized (v1-v26)")
|
||||
await conn.execute(SCHEMA_V27_SQL)
|
||||
logger.info("Database schema initialized (v1-v27)")
|
||||
|
||||
|
||||
async def init_schema() -> None:
|
||||
@@ -2329,6 +2352,85 @@ async def list_draft_final_pairs(status: str | None = None, limit: int = 200) ->
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def insert_style_exemplar(
|
||||
decision_number: str, source: str, practice_area: str, outcome: str,
|
||||
section: str, paragraph_text: str, word_count: int, embedding: list[float],
|
||||
) -> None:
|
||||
"""Insert one block-level style exemplar (T1 backfill)."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""INSERT INTO style_exemplars
|
||||
(decision_number, source, practice_area, outcome, section,
|
||||
paragraph_text, word_count, embedding)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)""",
|
||||
decision_number, source, practice_area, outcome, section,
|
||||
paragraph_text, word_count, str(embedding),
|
||||
)
|
||||
|
||||
|
||||
async def delete_style_exemplars(decision_number: str, source: str) -> int:
|
||||
"""Idempotent backfill: clear a decision's exemplars before re-inserting."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
res = await conn.execute(
|
||||
"DELETE FROM style_exemplars WHERE decision_number = $1 AND source = $2",
|
||||
decision_number, source,
|
||||
)
|
||||
try:
|
||||
return int(res.split()[-1])
|
||||
except (ValueError, IndexError):
|
||||
return 0
|
||||
|
||||
|
||||
async def search_style_exemplars(
|
||||
query_embedding: list[float],
|
||||
section: str | None = None,
|
||||
outcome: str | None = None,
|
||||
practice_area: str | None = None,
|
||||
limit: int = 6,
|
||||
) -> list[dict]:
|
||||
"""Retrieve Dafna's own block-level paragraphs as STYLE exemplars (T2).
|
||||
Filters by section (block) + optionally outcome/practice_area for the closest
|
||||
match to the block being written. Soft filters: outcome/practice_area narrow but
|
||||
never zero-out — section is the hard filter."""
|
||||
pool = await get_pool()
|
||||
conditions, params, idx = [], [query_embedding, limit], 3
|
||||
if section:
|
||||
conditions.append(f"section = ${idx}"); params.append(section); idx += 1
|
||||
if outcome:
|
||||
conditions.append(f"(outcome = ${idx} OR outcome = '')"); params.append(outcome); idx += 1
|
||||
if practice_area:
|
||||
conditions.append(f"(practice_area = ${idx} OR practice_area = '')"); params.append(practice_area); idx += 1
|
||||
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||
sql = f"""
|
||||
SELECT decision_number, source, section, outcome, practice_area,
|
||||
paragraph_text, word_count,
|
||||
1 - (embedding <=> $1) AS score
|
||||
FROM style_exemplars
|
||||
{where}
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT $2
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(sql, *params)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
async def count_style_exemplars() -> dict:
|
||||
"""Coverage check for the backfill."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
total = await conn.fetchval("SELECT count(*) FROM style_exemplars")
|
||||
by_section = await conn.fetch(
|
||||
"SELECT section, count(*) AS n FROM style_exemplars GROUP BY section ORDER BY n DESC"
|
||||
)
|
||||
decisions = await conn.fetchval(
|
||||
"SELECT count(DISTINCT decision_number) FROM style_exemplars"
|
||||
)
|
||||
return {"total": total, "decisions": decisions, "by_section": [dict(r) for r in by_section]}
|
||||
|
||||
|
||||
async def upsert_style_pattern(
|
||||
pattern_type: str,
|
||||
pattern_text: str,
|
||||
|
||||
Reference in New Issue
Block a user