Auto-strip Nevo preambles and separate style analysis per appeal subtype
- Add strip_nevo_preamble() to extractor.py — auto-removes Nevo database headers (bibliography, legislation, mini-ratio) during training upload - Add appeal_subtype column to style_patterns table — patterns are now stored per subtype instead of globally mixed - Update clear_style_patterns() to support subtype-scoped deletion - Pass appeal_subtype through analyze_corpus → store → upsert pipeline Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -134,8 +134,8 @@ async def analyze_corpus(appeal_subtype: str = "") -> dict:
|
||||
if not rows:
|
||||
return {"error": "אין החלטות בקורפוס. העלה החלטות קודמות תחילה."}
|
||||
|
||||
# Clear old patterns before re-analysis
|
||||
await db.clear_style_patterns()
|
||||
# Clear old patterns for this subtype (or all if unfiltered)
|
||||
await db.clear_style_patterns(appeal_subtype)
|
||||
|
||||
# Calculate token budget
|
||||
total_chars = sum(len(row["full_text"]) for row in rows)
|
||||
@@ -147,12 +147,12 @@ async def analyze_corpus(appeal_subtype: str = "") -> dict:
|
||||
)
|
||||
|
||||
if estimated_tokens < MAX_INPUT_TOKENS:
|
||||
return await _analyze_single_pass(rows)
|
||||
return await _analyze_single_pass(rows, appeal_subtype)
|
||||
else:
|
||||
return await _analyze_multi_pass(rows)
|
||||
return await _analyze_multi_pass(rows, appeal_subtype)
|
||||
|
||||
|
||||
async def _analyze_single_pass(rows) -> dict:
|
||||
async def _analyze_single_pass(rows, appeal_subtype: str = "") -> dict:
|
||||
"""Send all decisions in a single API call."""
|
||||
decisions_text = ""
|
||||
for row in rows:
|
||||
@@ -164,10 +164,10 @@ async def _analyze_single_pass(rows) -> dict:
|
||||
timeout=claude_session.LONG_TIMEOUT,
|
||||
)
|
||||
|
||||
return await _parse_and_store_patterns(raw, len(rows))
|
||||
return await _parse_and_store_patterns(raw, len(rows), appeal_subtype)
|
||||
|
||||
|
||||
async def _analyze_multi_pass(rows) -> dict:
|
||||
async def _analyze_multi_pass(rows, appeal_subtype: str = "") -> dict:
|
||||
"""Analyze each decision individually, then synthesize patterns."""
|
||||
all_patterns = []
|
||||
|
||||
@@ -197,7 +197,7 @@ async def _analyze_multi_pass(rows) -> dict:
|
||||
timeout=claude_session.LONG_TIMEOUT,
|
||||
)
|
||||
|
||||
return await _parse_and_store_patterns(raw, len(rows))
|
||||
return await _parse_and_store_patterns(raw, len(rows), appeal_subtype)
|
||||
|
||||
|
||||
def _extract_json(response_text: str) -> list | None:
|
||||
@@ -248,14 +248,16 @@ def _extract_json(response_text: str) -> list | None:
|
||||
return None
|
||||
|
||||
|
||||
async def _parse_and_store_patterns(response_text: str, num_decisions: int) -> dict:
|
||||
async def _parse_and_store_patterns(
|
||||
response_text: str, num_decisions: int, appeal_subtype: str = "",
|
||||
) -> dict:
|
||||
"""Parse Claude's response and store patterns in the database."""
|
||||
patterns = _extract_json(response_text)
|
||||
|
||||
if patterns is None:
|
||||
return {"error": "Could not parse analysis results", "raw": response_text}
|
||||
|
||||
# Store patterns
|
||||
# Store patterns tagged by appeal_subtype
|
||||
count = 0
|
||||
for pattern in patterns:
|
||||
await db.upsert_style_pattern(
|
||||
@@ -263,11 +265,13 @@ async def _parse_and_store_patterns(response_text: str, num_decisions: int) -> d
|
||||
pattern_text=pattern.get("text", ""),
|
||||
context=pattern.get("context", ""),
|
||||
examples=[pattern.get("example", "")],
|
||||
appeal_subtype=appeal_subtype,
|
||||
)
|
||||
count += 1
|
||||
|
||||
return {
|
||||
"patterns_found": count,
|
||||
"decisions_analyzed": num_decisions,
|
||||
"appeal_subtype": appeal_subtype or "all",
|
||||
"pattern_types": list({p.get("type") for p in patterns}),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user