#!/usr/bin/env python3 """G12 leak-guard — enforce the Agent Platform Port seam (docs/spec/X15 §4 / R4). The single, canonical checker for INV-G12. Used by BOTH the interactive PreToolUse hook (``scripts/spec-guard.sh``, warn-only) and the CI fitness-test (``mcp-server/tests/test_platform_port_leak_guard.py``, hard fail) — one implementation, no parallel rule (G2). Two HARD rules: 1. **Intelligence layer is platform-clean.** ``mcp-server/src`` (the MCP tools + decision/RAG/extraction logic) contains ZERO Paperclip-specific symbols. A short, explicit baseline allowlist (``_ALLOW``) covers pre-existing benign prose mentions (the origin of ``company_id``) and the host pm2 bridge that legitimately names the ``paperclip`` service — keyed by substring so it survives line-number shifts. 2. **Import seam.** Only ``web/agent_platform_port.py`` (the Port) and the declared shell itself (``web/paperclip_client.py`` / ``web/paperclip_api.py``) may import ``web.paperclip_client`` / ``web.paperclip_api``. Any other file in ``web/`` that imports them is a violation (R2 established the seam). OUT OF SCOPE (not intelligence): the declared shell (paperclip_client/api, plugin-legal-ai, adapters, web-ui settings paperclip-tab / paperclip-agents, skills/new-company-setup), and AUTO-GENERATED files (web-ui/src/lib/api/types.ts mirrors the backend OpenAPI — governed by the backend, not hand-fixable). Usage: leak_guard.py # scan the whole repo; exit 1 on any violation leak_guard.py ... # scan only the given files (the spec-guard hook) """ from __future__ import annotations import re import sys from pathlib import Path REPO = Path(__file__).resolve().parent.parent # Paperclip-specific symbols that must never appear in the intelligence layer. HARD = re.compile( r"paperclip|Paperclip|PAPERCLIP|wakeup|heartbeat|HEARTBEAT|pc_request|" r"pc\.sh|X-Paperclip|agent_wakeup|heartbeat_run|ctx\.agents|issueId" ) # Intelligence layer — rule 1 applies here (zero hard terms, save the allowlist). PROTECTED_DIRS = ["mcp-server/src"] # Baseline allowlist: (path-suffix, substring-in-line). A hard-term hit is allowed # only if its file ends with AND the line contains . # Keep this list SHORT and justified — every entry is a documented exception. _ALLOW: list[tuple[str, str]] = [ # Host pm2 bridge legitimately lists the 'paperclip' service (ops, not intel). ("court_fetch_service/server.py", "pm2 status of legal-* / paperclip services"), ("court_fetch_service/server.py", '("legal-", "paperclip")'), ("court_fetch_service/server.py", "never paperclip or arbitrary processes"), # Prose comments naming the ORIGIN of a stored field — not code coupling. ("services/db.py", "Paperclip company UUID"), ("services/db.py", "from a Paperclip issue"), ("services/db.py", "The Paperclip project"), ] # Import-seam — rule 2. Only these web/ files may import the Paperclip client. SEAM_ALLOWED = { "web/agent_platform_port.py", # the Port "web/paperclip_client.py", # the shell itself "web/paperclip_api.py", # the shell itself } SEAM_IMPORT = re.compile(r"^\s*(from\s+web\.paperclip_(client|api)\s+import|" r"import\s+web\.paperclip_(client|api)\b)") _SKIP_PARTS = {".venv", "node_modules", "__pycache__", ".git", ".next"} def _is_test(p: Path) -> bool: return "tests" in p.parts or "test" in p.parts or p.name.startswith("test_") def _skip(p: Path) -> bool: return any(part in _SKIP_PARTS for part in p.parts) def _allowed(rel: str, line: str) -> bool: return any(rel.endswith(suf) and sub in line for suf, sub in _ALLOW) def _iter_py(base: Path): for p in base.rglob("*.py"): if not _skip(p) and not _is_test(p): yield p def scan(files: list[Path] | None = None) -> list[str]: """Return a list of violation strings (empty == clean).""" violations: list[str] = [] # Rule 1 — intelligence layer is platform-clean. if files is None: targets = [p for d in PROTECTED_DIRS for p in _iter_py(REPO / d)] else: prot = [REPO / d for d in PROTECTED_DIRS] targets = [ p for p in files if any(prot_d in p.resolve().parents or p.resolve() == prot_d for prot_d in prot) and p.suffix == ".py" and not _is_test(p) and not _skip(p) ] for p in targets: rel = p.resolve().relative_to(REPO).as_posix() try: lines = p.read_text(encoding="utf-8").splitlines() except (OSError, UnicodeDecodeError): continue for i, line in enumerate(lines, 1): if HARD.search(line) and not _allowed(rel, line): violations.append( f"{rel}:{i}: Paperclip symbol in the intelligence layer " f"(INV-G12). Route platform access through " f"web/agent_platform_port.py, or add a justified baseline " f"entry in scripts/leak_guard.py if genuinely benign.\n" f" {line.strip()[:120]}" ) # Rule 2 — import seam (web/ only). web = REPO / "web" seam_targets = ( [p for p in _iter_py(web)] if files is None else [p for p in files if p.suffix == ".py" and (web in p.resolve().parents) and not _is_test(p)] ) for p in seam_targets: rel = p.resolve().relative_to(REPO).as_posix() if rel in SEAM_ALLOWED: continue try: lines = p.read_text(encoding="utf-8").splitlines() except (OSError, UnicodeDecodeError): continue for i, line in enumerate(lines, 1): if SEAM_IMPORT.search(line): violations.append( f"{rel}:{i}: imports the Paperclip client directly " f"(INV-G12 seam). Import from web.agent_platform_port instead.\n" f" {line.strip()[:120]}" ) return violations def main(argv: list[str]) -> int: files = [Path(a) for a in argv] or None violations = scan(files) if violations: sys.stderr.write( "✗ G12 leak-guard — Agent Platform Port violated " f"({len(violations)} finding(s)):\n\n" ) for v in violations: sys.stderr.write(f" • {v}\n") sys.stderr.write( "\nSee docs/spec/X15-agent-platform-port.md (G12).\n" ) return 1 if files is None: print("✓ G12 leak-guard: intelligence layer is platform-clean; " "import seam intact.") return 0 if __name__ == "__main__": raise SystemExit(main(sys.argv[1:]))