On main: Pre-merge: synced agent files

This commit is contained in:
2026-04-13 12:42:00 +00:00
16 changed files with 933 additions and 94 deletions

View File

@@ -26,6 +26,13 @@ CHAIR_POSITION_PLACEHOLDERS = (
"[טרם מולא]",
)
# Any text starting with these prefixes is also a placeholder
# (the analyst sometimes adds explanatory text after the bracket)
CHAIR_POSITION_PLACEHOLDER_PREFIXES = (
"[ימולא",
"ימולא ע",
)
CHAIR_POSITION_LABEL = "עמדת ועדת הערר"
# Matches "## N. title" or "## title" for main sections
@@ -47,6 +54,9 @@ CASE_NUMBER_RE = re.compile(r"#\s*ניתוח.*?ערר\s+([\d/\-]+)", re.MULTILIN
DATE_RE = re.compile(r"^תאריך:\s*(.+?)\s*$", re.MULTILINE)
RESEARCH_FINDINGS_FILENAME = "research-findings.md"
def _is_placeholder(text: str) -> bool:
"""Check if a field value is one of the placeholder strings (empty)."""
stripped = text.strip()
@@ -55,6 +65,9 @@ def _is_placeholder(text: str) -> bool:
for ph in CHAIR_POSITION_PLACEHOLDERS:
if ph in stripped:
return True
for prefix in CHAIR_POSITION_PLACEHOLDER_PREFIXES:
if stripped.startswith(prefix):
return True
return False
@@ -434,3 +447,199 @@ def extract_chair_directions(file_path: Path) -> dict[str, Any]:
"threshold_claims": threshold,
"issues": issues,
}
# ── Full analysis extraction (for legal-writer) ──────────────────
# Map Hebrew field labels → stable English keys for JSON output
_FIELD_KEY_MAP = {
"טענה": "claims",
"טענה (claim)": "claims",
"טענות": "claims",
"תשובה": "responses",
"תשובה (response)": "responses",
"תשובות": "responses",
"תגובה": "replies",
"תגובה (reply)": "replies",
"תגובות": "replies",
# Analyst sometimes appends party name to the label
# e.g. "תגובה (reply — קובר)" — catch the pattern dynamically below
"ניתוח אסטרטגי": "strategic_analysis",
"חוזקות": "strengths",
"חולשות": "weaknesses",
"הזדמנויות": "opportunities",
"שאלות משפטיות": "legal_questions",
"חיפוש תקדימים": "precedent_search",
"חקיקה רלוונטית": "relevant_legislation",
"תקדימים מהקורפוס הפנימי": "internal_precedents",
}
def _fields_to_dict(fields: list[dict]) -> dict[str, str]:
"""Convert ordered field list to a dict with stable English keys.
Unknown labels are kept as-is (Hebrew) so no data is lost.
Handles dynamic labels like "תגובה (reply — קובר)" by matching prefix.
"""
result: dict[str, str] = {}
for f in fields:
label = f["label"]
key = _FIELD_KEY_MAP.get(label)
if key is None:
# Try prefix matching for dynamic labels (e.g. "תגובה (reply — name)")
if label.startswith("תגובה"):
key = "replies"
elif label.startswith("טענה"):
key = "claims"
elif label.startswith("תשובה"):
key = "responses"
else:
key = label
result[key] = f["content"]
return result
def extract_full_analysis(file_path: Path) -> dict[str, Any]:
"""Extract the complete strategic analysis from analysis-and-research.md.
Unlike extract_chair_directions (which returns only chair positions),
this returns ALL fields per issue: claims, responses, replies,
strengths/weaknesses/opportunities, legal questions, legislation,
and internal precedents — everything the legal-writer needs to
produce block-yod (discussion).
Returns the same envelope as extract_chair_directions (status, counts)
plus full field data in each item.
"""
if not file_path.exists():
return {
"file_exists": False,
"status": "missing",
"error": "analysis-and-research.md not found",
"procedural_background": "",
"agreed_facts": "",
"disputed_facts": "",
"conclusions": "",
"threshold_claims": [],
"issues": [],
"total_items": 0,
"filled_count": 0,
"empty_count": 0,
}
parsed = parse(file_path)
def enrich_item(item: dict) -> dict:
"""Return full item with all fields as a flat dict."""
enriched = {
"id": item["id"],
"number": item["number"],
"title": item["title"],
"direction": item.get("chair_position", "") or "",
}
# Add all extracted fields with stable keys
enriched.update(_fields_to_dict(item.get("fields", [])))
return enriched
threshold = [enrich_item(t) for t in parsed.get("threshold_claims", [])]
issues = [enrich_item(i) for i in parsed.get("issues", [])]
all_items = threshold + issues
total = len(all_items)
filled = sum(1 for x in all_items if x["direction"].strip())
empty = total - filled
if total == 0:
status = "missing"
elif filled == 0:
status = "empty"
elif filled == total:
status = "complete"
else:
status = "partial"
return {
"file_exists": True,
"file_path": str(file_path),
"case_number": parsed.get("header", {}).get("case_number", ""),
"modified_at": parsed.get("header", {}).get("modified_at", ""),
"status": status,
"total_items": total,
"filled_count": filled,
"empty_count": empty,
"procedural_background": parsed.get("procedural_background", ""),
"agreed_facts": parsed.get("agreed_facts", ""),
"disputed_facts": parsed.get("disputed_facts", ""),
"conclusions": parsed.get("conclusions", ""),
"threshold_claims": threshold,
"issues": issues,
}
# ── Research findings extraction ──────────────────────────────────
def extract_research_findings(file_path: Path) -> dict[str, Any]:
"""Extract structured research findings from research-findings.md.
The file is produced by the legal-researcher agent and contains:
precedent summaries, plan mappings, timeline, and recommendations.
Returns a structured dict or a status-only dict if file is missing.
"""
if not file_path.exists():
return {
"file_exists": False,
"status": "missing",
"error": "research-findings.md not found",
}
content = file_path.read_text(encoding="utf-8")
stat = file_path.stat()
mtime_iso = datetime.fromtimestamp(stat.st_mtime).isoformat()
sections = _split_main_sections(content)
result: dict[str, Any] = {
"file_exists": True,
"file_path": str(file_path),
"modified_at": mtime_iso,
"file_size": stat.st_size,
"precedent_summaries": [],
"plan_mappings": [],
"timeline": "",
"recommendations": "",
"other_sections": [],
}
for _number, title, body in sections:
title_norm = title.strip()
if "סיכום פסיקה" in title_norm or "פסיקה" in title_norm:
subs = _split_subsections(body)
for sub_title, sub_body in subs:
fields = _extract_fields(sub_body)
result["precedent_summaries"].append({
"title": sub_title,
"fields": {f["label"]: f["content"] for f in fields},
"raw": sub_body if not fields else "",
})
elif "מיפוי תכנית" in title_norm or "תכנית" in title_norm:
subs = _split_subsections(body)
for sub_title, sub_body in subs:
fields = _extract_fields(sub_body)
result["plan_mappings"].append({
"title": sub_title,
"fields": {f["label"]: f["content"] for f in fields},
"raw": sub_body if not fields else "",
})
elif "ציר זמן" in title_norm:
result["timeline"] = body
elif "המלצות" in title_norm:
result["recommendations"] = body
else:
result["other_sections"].append({
"title": title_norm,
"body": body,
})
return result