From 4d8422198ad255ef31d5827580584ad82e68d7e6 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 31 May 2026 11:35:07 +0000 Subject: [PATCH] feat(guard): fitness function blocking raw Paperclip access (GAP-22, FU-8a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wakeup-INSERT rule is universal (never allowlisted — hard invariant). Raw-HTTP rule exempts the sanctioned helpers + standalone operator/admin scripts (a distinct category per fitness-function scope differentiation + DRY: tooling needn't reuse the FastAPI wrapper). Repo scanned clean under these rules. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../tests/test_paperclip_access_guard.py | 69 +++++++++++++------ 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/mcp-server/tests/test_paperclip_access_guard.py b/mcp-server/tests/test_paperclip_access_guard.py index 3787e5b..a44bf74 100644 --- a/mcp-server/tests/test_paperclip_access_guard.py +++ b/mcp-server/tests/test_paperclip_access_guard.py @@ -15,19 +15,24 @@ import pytest REPO = Path(__file__).resolve().parents[2] SCAN_ROOTS = [REPO / "web", REPO / "mcp-server" / "src", REPO / "scripts"] -# Files exempt from the HTTP-to-Paperclip rule (the sanctioned helpers + legacy DB-read client). -ALLOWLIST = { +# Exempt ONLY from the raw-HTTP-to-Paperclip rule. Two categories, per the +# endorsed "differentiate production code from operational tooling" pattern for +# architectural fitness functions (cf. InfoQ fitness-functions; ESLint `overrides`): +# (a) the sanctioned helpers themselves (the one place raw HTTP is correct); +# (b) standalone operator/admin scripts run manually or by cron with the board +# key — a distinct category from app/agent code. Forcing them through the +# wrapper is over-engineering (DRY: "duplication is cheaper than the wrong +# abstraction"); direct httpx with the board key is acceptable for tooling. +# NOTE: the agent_wakeup_requests-INSERT rule is NOT exempted for anyone (below) — +# it is a hard invariant for ALL code (a direct insert skips heartbeat creation). +HTTP_RULE_ALLOWLIST = { REPO / "web" / "paperclip_api.py", # the sanctioned pc_request helper REPO / "scripts" / "pc.sh", # the sanctioned bash wrapper - REPO / "web" / "paperclip_client.py", # legacy: DB reads only (no raw http, no wakeup insert) - # Standalone operator/admin scripts that run on the host machine (not agent code, not FastAPI - # handlers). pc_request is an async FastAPI coroutine, not usable standalone; these scripts are - # the explicitly-documented admin tooling referenced in CLAUDE.md and are sanctioned for - # direct httpx use. They are NOT agent code bypassing the approved helper. - REPO / "scripts" / "sync_agents_across_companies.py", # documented admin tool: CMP→CMPA agent-config sync (CLAUDE.md §Cross-company sync) - REPO / "scripts" / "audit_corpus_integrity.py", # cron audit tool: posts CEO wakeup on anomaly (best-effort, explicitly guarded) - REPO / "scripts" / "fix_paperclipai_skills_drift.py", # one-shot operator fix for paperclipai skill drift (Gap #28 runbook) - REPO / "scripts" / "sync_missing_agent_skills.py", # one-shot operator fix: add missing paperclipSkillSync (Gap #28) + REPO / "web" / "paperclip_client.py", # legacy: DB reads only + REPO / "scripts" / "sync_agents_across_companies.py", # operator tool: CMP→CMPA agent-config sync (CLAUDE.md) + REPO / "scripts" / "audit_corpus_integrity.py", # cron audit tool: posts CEO wakeup via the wakeup API + REPO / "scripts" / "fix_paperclipai_skills_drift.py", # one-shot operator fix (Gap #28 runbook) + REPO / "scripts" / "sync_missing_agent_skills.py", # one-shot operator fix (Gap #28) } # Directories to skip entirely during scan (dead/archived code, virtual envs, test fixtures). @@ -38,14 +43,23 @@ _HTTP_CLIENT = re.compile(r"\bhttpx\b|\brequests\.(get|post|put|patch|delete)\b| _WAKEUP_INSERT = re.compile(r"insert\s+into\s+agent_wakeup_requests", re.IGNORECASE) -def _scan_text(text: str) -> list[str]: - """Return violation reasons for a single file's text.""" - reasons = [] +def _wakeup_violation(text: str) -> str | None: + """Universal hard invariant — applies to ALL code (never allowlisted).""" if _WAKEUP_INSERT.search(text): - reasons.append("direct INSERT INTO agent_wakeup_requests — use the wakeup API") + return "direct INSERT INTO agent_wakeup_requests — use the wakeup API (POST /api/agents/{id}/wakeup)" + return None + + +def _http_violation(text: str) -> str | None: + """Raw HTTP to Paperclip — exempted for HTTP_RULE_ALLOWLIST files only.""" if _PC_URL.search(text) and _HTTP_CLIENT.search(text): - reasons.append("raw HTTP client + Paperclip URL — use web/paperclip_api.pc_request or scripts/pc.sh") - return reasons + return "raw HTTP client + Paperclip URL — use web/paperclip_api.pc_request or scripts/pc.sh" + return None + + +def _scan_text(text: str) -> list[str]: + """All violation reasons for a file's text, ignoring allowlist (used by unit tests).""" + return [r for r in (_wakeup_violation(text), _http_violation(text)) if r] def _iter_source_files(): @@ -54,21 +68,26 @@ def _iter_source_files(): continue for ext in ("*.py", "*.sh"): for f in root.rglob(ext): - f_str = str(f) - if f in ALLOWLIST or any(frag in f_str for frag in _SKIP_PATH_FRAGMENTS): + if any(frag in str(f) for frag in _SKIP_PATH_FRAGMENTS): continue yield f def find_violations() -> list[tuple[str, str]]: + """Wakeup-INSERT rule applies to every file; HTTP rule respects HTTP_RULE_ALLOWLIST.""" out = [] for f in _iter_source_files(): try: text = f.read_text(encoding="utf-8") except (UnicodeDecodeError, OSError): continue - for reason in _scan_text(text): - out.append((str(f.relative_to(REPO)), reason)) + w = _wakeup_violation(text) + if w: + out.append((str(f.relative_to(REPO)), w)) + if f not in HTTP_RULE_ALLOWLIST: + h = _http_violation(text) + if h: + out.append((str(f.relative_to(REPO)), h)) return out @@ -86,6 +105,14 @@ def test_scan_ignores_plain_code(): assert _scan_text("def add(a, b):\n return a + b\n") == [] +def test_wakeup_insert_rule_is_universal_not_allowlisted(): + # The wakeup-INSERT invariant must apply to ALL code; find_violations checks it + # for every file regardless of HTTP_RULE_ALLOWLIST. _wakeup_violation is the + # standalone check used unconditionally in find_violations (no allowlist branch). + assert _wakeup_violation("INSERT INTO agent_wakeup_requests (id) VALUES ($1)") is not None + assert _http_violation('httpx.post(f"{PAPERCLIP_API_URL}/x")') is not None + + def test_repo_has_no_paperclip_access_violations(): violations = find_violations() assert violations == [], "Un-sanctioned Paperclip access found:\n" + "\n".join(