On main: Pre-merge: synced agent files

2026-04-13 12:42:00 +00:00
parent 3541238239 4b217bf745 ee83b6b345
commit 243d7b3497
16 changed files with 933 additions and 94 deletions
--- a/mcp-server/src/legal_mcp/services/research_md.py
+++ b/mcp-server/src/legal_mcp/services/research_md.py
@@ -26,6 +26,13 @@ CHAIR_POSITION_PLACEHOLDERS = (
    "[טרם מולא]",
 )

+# Any text starting with these prefixes is also a placeholder
+# (the analyst sometimes adds explanatory text after the bracket)
+CHAIR_POSITION_PLACEHOLDER_PREFIXES = (
+    "[ימולא",
+    "ימולא ע",
+)
+
 CHAIR_POSITION_LABEL = "עמדת ועדת הערר"

 # Matches "## N. title" or "## title" for main sections
@@ -47,6 +54,9 @@ CASE_NUMBER_RE = re.compile(r"#\s*ניתוח.*?ערר\s+([\d/\-]+)", re.MULTILIN
 DATE_RE = re.compile(r"^תאריך:\s*(.+?)\s*$", re.MULTILINE)


+RESEARCH_FINDINGS_FILENAME = "research-findings.md"
+
+
 def _is_placeholder(text: str) -> bool:
    """Check if a field value is one of the placeholder strings (empty)."""
    stripped = text.strip()
@@ -55,6 +65,9 @@ def _is_placeholder(text: str) -> bool:
    for ph in CHAIR_POSITION_PLACEHOLDERS:
        if ph in stripped:
            return True
+    for prefix in CHAIR_POSITION_PLACEHOLDER_PREFIXES:
+        if stripped.startswith(prefix):
+            return True
    return False


@@ -434,3 +447,199 @@ def extract_chair_directions(file_path: Path) -> dict[str, Any]:
        "threshold_claims": threshold,
        "issues": issues,
    }
+
+
+# ── Full analysis extraction (for legal-writer) ──────────────────
+
+
+# Map Hebrew field labels → stable English keys for JSON output
+_FIELD_KEY_MAP = {
+    "טענה": "claims",
+    "טענה (claim)": "claims",
+    "טענות": "claims",
+    "תשובה": "responses",
+    "תשובה (response)": "responses",
+    "תשובות": "responses",
+    "תגובה": "replies",
+    "תגובה (reply)": "replies",
+    "תגובות": "replies",
+    # Analyst sometimes appends party name to the label
+    # e.g. "תגובה (reply — קובר)" — catch the pattern dynamically below
+    "ניתוח אסטרטגי": "strategic_analysis",
+    "חוזקות": "strengths",
+    "חולשות": "weaknesses",
+    "הזדמנויות": "opportunities",
+    "שאלות משפטיות": "legal_questions",
+    "חיפוש תקדימים": "precedent_search",
+    "חקיקה רלוונטית": "relevant_legislation",
+    "תקדימים מהקורפוס הפנימי": "internal_precedents",
+}
+
+
+def _fields_to_dict(fields: list[dict]) -> dict[str, str]:
+    """Convert ordered field list to a dict with stable English keys.
+
+    Unknown labels are kept as-is (Hebrew) so no data is lost.
+    Handles dynamic labels like "תגובה (reply — קובר)" by matching prefix.
+    """
+    result: dict[str, str] = {}
+    for f in fields:
+        label = f["label"]
+        key = _FIELD_KEY_MAP.get(label)
+        if key is None:
+            # Try prefix matching for dynamic labels (e.g. "תגובה (reply — name)")
+            if label.startswith("תגובה"):
+                key = "replies"
+            elif label.startswith("טענה"):
+                key = "claims"
+            elif label.startswith("תשובה"):
+                key = "responses"
+            else:
+                key = label
+        result[key] = f["content"]
+    return result
+
+
+def extract_full_analysis(file_path: Path) -> dict[str, Any]:
+    """Extract the complete strategic analysis from analysis-and-research.md.
+
+    Unlike extract_chair_directions (which returns only chair positions),
+    this returns ALL fields per issue: claims, responses, replies,
+    strengths/weaknesses/opportunities, legal questions, legislation,
+    and internal precedents — everything the legal-writer needs to
+    produce block-yod (discussion).
+
+    Returns the same envelope as extract_chair_directions (status, counts)
+    plus full field data in each item.
+    """
+    if not file_path.exists():
+        return {
+            "file_exists": False,
+            "status": "missing",
+            "error": "analysis-and-research.md not found",
+            "procedural_background": "",
+            "agreed_facts": "",
+            "disputed_facts": "",
+            "conclusions": "",
+            "threshold_claims": [],
+            "issues": [],
+            "total_items": 0,
+            "filled_count": 0,
+            "empty_count": 0,
+        }
+
+    parsed = parse(file_path)
+
+    def enrich_item(item: dict) -> dict:
+        """Return full item with all fields as a flat dict."""
+        enriched = {
+            "id": item["id"],
+            "number": item["number"],
+            "title": item["title"],
+            "direction": item.get("chair_position", "") or "",
+        }
+        # Add all extracted fields with stable keys
+        enriched.update(_fields_to_dict(item.get("fields", [])))
+        return enriched
+
+    threshold = [enrich_item(t) for t in parsed.get("threshold_claims", [])]
+    issues = [enrich_item(i) for i in parsed.get("issues", [])]
+
+    all_items = threshold + issues
+    total = len(all_items)
+    filled = sum(1 for x in all_items if x["direction"].strip())
+    empty = total - filled
+
+    if total == 0:
+        status = "missing"
+    elif filled == 0:
+        status = "empty"
+    elif filled == total:
+        status = "complete"
+    else:
+        status = "partial"
+
+    return {
+        "file_exists": True,
+        "file_path": str(file_path),
+        "case_number": parsed.get("header", {}).get("case_number", ""),
+        "modified_at": parsed.get("header", {}).get("modified_at", ""),
+        "status": status,
+        "total_items": total,
+        "filled_count": filled,
+        "empty_count": empty,
+        "procedural_background": parsed.get("procedural_background", ""),
+        "agreed_facts": parsed.get("agreed_facts", ""),
+        "disputed_facts": parsed.get("disputed_facts", ""),
+        "conclusions": parsed.get("conclusions", ""),
+        "threshold_claims": threshold,
+        "issues": issues,
+    }
+
+
+# ── Research findings extraction ──────────────────────────────────
+
+
+def extract_research_findings(file_path: Path) -> dict[str, Any]:
+    """Extract structured research findings from research-findings.md.
+
+    The file is produced by the legal-researcher agent and contains:
+    precedent summaries, plan mappings, timeline, and recommendations.
+    Returns a structured dict or a status-only dict if file is missing.
+    """
+    if not file_path.exists():
+        return {
+            "file_exists": False,
+            "status": "missing",
+            "error": "research-findings.md not found",
+        }
+
+    content = file_path.read_text(encoding="utf-8")
+    stat = file_path.stat()
+    mtime_iso = datetime.fromtimestamp(stat.st_mtime).isoformat()
+
+    sections = _split_main_sections(content)
+
+    result: dict[str, Any] = {
+        "file_exists": True,
+        "file_path": str(file_path),
+        "modified_at": mtime_iso,
+        "file_size": stat.st_size,
+        "precedent_summaries": [],
+        "plan_mappings": [],
+        "timeline": "",
+        "recommendations": "",
+        "other_sections": [],
+    }
+
+    for _number, title, body in sections:
+        title_norm = title.strip()
+        if "סיכום פסיקה" in title_norm or "פסיקה" in title_norm:
+            subs = _split_subsections(body)
+            for sub_title, sub_body in subs:
+                fields = _extract_fields(sub_body)
+                result["precedent_summaries"].append({
+                    "title": sub_title,
+                    "fields": {f["label"]: f["content"] for f in fields},
+                    "raw": sub_body if not fields else "",
+                })
+        elif "מיפוי תכנית" in title_norm or "תכנית" in title_norm:
+            subs = _split_subsections(body)
+            for sub_title, sub_body in subs:
+                fields = _extract_fields(sub_body)
+                result["plan_mappings"].append({
+                    "title": sub_title,
+                    "fields": {f["label"]: f["content"] for f in fields},
+                    "raw": sub_body if not fields else "",
+                })
+        elif "ציר זמן" in title_norm:
+            result["timeline"] = body
+        elif "המלצות" in title_norm:
+            result["recommendations"] = body
+        else:
+            result["other_sections"].append({
+                "title": title_norm,
+                "body": body,
+            })
+
+    return result