FU-8a: process→code guards (GAP-21/22) #16
@@ -15,19 +15,24 @@ import pytest
|
|||||||
REPO = Path(__file__).resolve().parents[2]
|
REPO = Path(__file__).resolve().parents[2]
|
||||||
SCAN_ROOTS = [REPO / "web", REPO / "mcp-server" / "src", REPO / "scripts"]
|
SCAN_ROOTS = [REPO / "web", REPO / "mcp-server" / "src", REPO / "scripts"]
|
||||||
|
|
||||||
# Files exempt from the HTTP-to-Paperclip rule (the sanctioned helpers + legacy DB-read client).
|
# Exempt ONLY from the raw-HTTP-to-Paperclip rule. Two categories, per the
|
||||||
ALLOWLIST = {
|
# endorsed "differentiate production code from operational tooling" pattern for
|
||||||
|
# architectural fitness functions (cf. InfoQ fitness-functions; ESLint `overrides`):
|
||||||
|
# (a) the sanctioned helpers themselves (the one place raw HTTP is correct);
|
||||||
|
# (b) standalone operator/admin scripts run manually or by cron with the board
|
||||||
|
# key — a distinct category from app/agent code. Forcing them through the
|
||||||
|
# wrapper is over-engineering (DRY: "duplication is cheaper than the wrong
|
||||||
|
# abstraction"); direct httpx with the board key is acceptable for tooling.
|
||||||
|
# NOTE: the agent_wakeup_requests-INSERT rule is NOT exempted for anyone (below) —
|
||||||
|
# it is a hard invariant for ALL code (a direct insert skips heartbeat creation).
|
||||||
|
HTTP_RULE_ALLOWLIST = {
|
||||||
REPO / "web" / "paperclip_api.py", # the sanctioned pc_request helper
|
REPO / "web" / "paperclip_api.py", # the sanctioned pc_request helper
|
||||||
REPO / "scripts" / "pc.sh", # the sanctioned bash wrapper
|
REPO / "scripts" / "pc.sh", # the sanctioned bash wrapper
|
||||||
REPO / "web" / "paperclip_client.py", # legacy: DB reads only (no raw http, no wakeup insert)
|
REPO / "web" / "paperclip_client.py", # legacy: DB reads only
|
||||||
# Standalone operator/admin scripts that run on the host machine (not agent code, not FastAPI
|
REPO / "scripts" / "sync_agents_across_companies.py", # operator tool: CMP→CMPA agent-config sync (CLAUDE.md)
|
||||||
# handlers). pc_request is an async FastAPI coroutine, not usable standalone; these scripts are
|
REPO / "scripts" / "audit_corpus_integrity.py", # cron audit tool: posts CEO wakeup via the wakeup API
|
||||||
# the explicitly-documented admin tooling referenced in CLAUDE.md and are sanctioned for
|
REPO / "scripts" / "fix_paperclipai_skills_drift.py", # one-shot operator fix (Gap #28 runbook)
|
||||||
# direct httpx use. They are NOT agent code bypassing the approved helper.
|
REPO / "scripts" / "sync_missing_agent_skills.py", # one-shot operator fix (Gap #28)
|
||||||
REPO / "scripts" / "sync_agents_across_companies.py", # documented admin tool: CMP→CMPA agent-config sync (CLAUDE.md §Cross-company sync)
|
|
||||||
REPO / "scripts" / "audit_corpus_integrity.py", # cron audit tool: posts CEO wakeup on anomaly (best-effort, explicitly guarded)
|
|
||||||
REPO / "scripts" / "fix_paperclipai_skills_drift.py", # one-shot operator fix for paperclipai skill drift (Gap #28 runbook)
|
|
||||||
REPO / "scripts" / "sync_missing_agent_skills.py", # one-shot operator fix: add missing paperclipSkillSync (Gap #28)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Directories to skip entirely during scan (dead/archived code, virtual envs, test fixtures).
|
# Directories to skip entirely during scan (dead/archived code, virtual envs, test fixtures).
|
||||||
@@ -38,14 +43,23 @@ _HTTP_CLIENT = re.compile(r"\bhttpx\b|\brequests\.(get|post|put|patch|delete)\b|
|
|||||||
_WAKEUP_INSERT = re.compile(r"insert\s+into\s+agent_wakeup_requests", re.IGNORECASE)
|
_WAKEUP_INSERT = re.compile(r"insert\s+into\s+agent_wakeup_requests", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def _scan_text(text: str) -> list[str]:
|
def _wakeup_violation(text: str) -> str | None:
|
||||||
"""Return violation reasons for a single file's text."""
|
"""Universal hard invariant — applies to ALL code (never allowlisted)."""
|
||||||
reasons = []
|
|
||||||
if _WAKEUP_INSERT.search(text):
|
if _WAKEUP_INSERT.search(text):
|
||||||
reasons.append("direct INSERT INTO agent_wakeup_requests — use the wakeup API")
|
return "direct INSERT INTO agent_wakeup_requests — use the wakeup API (POST /api/agents/{id}/wakeup)"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _http_violation(text: str) -> str | None:
|
||||||
|
"""Raw HTTP to Paperclip — exempted for HTTP_RULE_ALLOWLIST files only."""
|
||||||
if _PC_URL.search(text) and _HTTP_CLIENT.search(text):
|
if _PC_URL.search(text) and _HTTP_CLIENT.search(text):
|
||||||
reasons.append("raw HTTP client + Paperclip URL — use web/paperclip_api.pc_request or scripts/pc.sh")
|
return "raw HTTP client + Paperclip URL — use web/paperclip_api.pc_request or scripts/pc.sh"
|
||||||
return reasons
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _scan_text(text: str) -> list[str]:
|
||||||
|
"""All violation reasons for a file's text, ignoring allowlist (used by unit tests)."""
|
||||||
|
return [r for r in (_wakeup_violation(text), _http_violation(text)) if r]
|
||||||
|
|
||||||
|
|
||||||
def _iter_source_files():
|
def _iter_source_files():
|
||||||
@@ -54,21 +68,26 @@ def _iter_source_files():
|
|||||||
continue
|
continue
|
||||||
for ext in ("*.py", "*.sh"):
|
for ext in ("*.py", "*.sh"):
|
||||||
for f in root.rglob(ext):
|
for f in root.rglob(ext):
|
||||||
f_str = str(f)
|
if any(frag in str(f) for frag in _SKIP_PATH_FRAGMENTS):
|
||||||
if f in ALLOWLIST or any(frag in f_str for frag in _SKIP_PATH_FRAGMENTS):
|
|
||||||
continue
|
continue
|
||||||
yield f
|
yield f
|
||||||
|
|
||||||
|
|
||||||
def find_violations() -> list[tuple[str, str]]:
|
def find_violations() -> list[tuple[str, str]]:
|
||||||
|
"""Wakeup-INSERT rule applies to every file; HTTP rule respects HTTP_RULE_ALLOWLIST."""
|
||||||
out = []
|
out = []
|
||||||
for f in _iter_source_files():
|
for f in _iter_source_files():
|
||||||
try:
|
try:
|
||||||
text = f.read_text(encoding="utf-8")
|
text = f.read_text(encoding="utf-8")
|
||||||
except (UnicodeDecodeError, OSError):
|
except (UnicodeDecodeError, OSError):
|
||||||
continue
|
continue
|
||||||
for reason in _scan_text(text):
|
w = _wakeup_violation(text)
|
||||||
out.append((str(f.relative_to(REPO)), reason))
|
if w:
|
||||||
|
out.append((str(f.relative_to(REPO)), w))
|
||||||
|
if f not in HTTP_RULE_ALLOWLIST:
|
||||||
|
h = _http_violation(text)
|
||||||
|
if h:
|
||||||
|
out.append((str(f.relative_to(REPO)), h))
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
@@ -86,6 +105,14 @@ def test_scan_ignores_plain_code():
|
|||||||
assert _scan_text("def add(a, b):\n return a + b\n") == []
|
assert _scan_text("def add(a, b):\n return a + b\n") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_wakeup_insert_rule_is_universal_not_allowlisted():
|
||||||
|
# The wakeup-INSERT invariant must apply to ALL code; find_violations checks it
|
||||||
|
# for every file regardless of HTTP_RULE_ALLOWLIST. _wakeup_violation is the
|
||||||
|
# standalone check used unconditionally in find_violations (no allowlist branch).
|
||||||
|
assert _wakeup_violation("INSERT INTO agent_wakeup_requests (id) VALUES ($1)") is not None
|
||||||
|
assert _http_violation('httpx.post(f"{PAPERCLIP_API_URL}/x")') is not None
|
||||||
|
|
||||||
|
|
||||||
def test_repo_has_no_paperclip_access_violations():
|
def test_repo_has_no_paperclip_access_violations():
|
||||||
violations = find_violations()
|
violations = find_violations()
|
||||||
assert violations == [], "Un-sanctioned Paperclip access found:\n" + "\n".join(
|
assert violations == [], "Un-sanctioned Paperclip access found:\n" + "\n".join(
|
||||||
|
|||||||
Reference in New Issue
Block a user