שלוש שכבות-הגנה נגד דליפת-זיכרון מדפדפנים יתומים, + טיפול בדליפה הגדולה
בפועל בשרת (task-master-mcp).
- camofox_client.py:
- asyncio.wait_for קשיח סביב כל ה-fetch (COURT_FETCH_HARD_TIMEOUT_S=180ש')
— hang → ביטול → async-with tear-down → reap.
- _reap_orphan_browsers(): הורג camoufox-bin יתומים (ppid=1) לפני ואחרי כל
fetch. סדרתיות (INV-CF4) → כל ppid=1 הוא שארית בטוחה.
- scripts/reap_orphan_procs.py: reaper כללי ל-task-master-mcp (~3GB יתומים)
+ camoufox-bin. רק ppid=1; /proc טהור. --dry-run / --loop N.
- scripts/legal-reaper.config.cjs: דמון pm2 (loop 180s, max_memory_restart 100M).
- X13 spec + SCRIPTS.md: תיעוד שכבות-ההגנה.
max_memory_restart בשירות (1.5G) כבר נותן רשת-ביטחון ברמת-התהליך.
Invariants: מקיים INV-CF4 (politeness/serial) — ללא שינוי חוזה.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
120 lines
4.2 KiB
Python
120 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Reap orphaned/runaway processes that saturate the Nautilus box.
|
|
|
|
Two known offenders (2026-06-07):
|
|
1. ``task-master-mcp`` (Node) — spawned by the Claude Code VSCode extension,
|
|
orphaned to ``ppid=1`` when its session ends, then **balloons to ~3GB
|
|
each**. They accrue as sessions cycle and exhaust RAM within minutes,
|
|
risking the OOM-killer hitting Postgres/Paperclip. See memory
|
|
``project_taskmaster_mcp_memory_leak``.
|
|
2. ``camoufox-bin`` (Firefox) — the X13 court-fetch browser. A fetch that
|
|
hangs or is killed mid-flight can leave a stray browser orphaned to
|
|
``ppid=1``. Serial-only fetching means any ``ppid=1`` camoufox-bin is
|
|
stale and safe to kill.
|
|
|
|
Safety: only processes **orphaned to ``ppid=1``** are reaped — a process still
|
|
owned by a live parent (an attached MCP server, or a browser a fetch is
|
|
actively using) is never touched. Pure ``/proc`` parsing, no psutil dependency.
|
|
|
|
Usage:
|
|
python scripts/reap_orphan_procs.py # one pass, print what was reaped
|
|
python scripts/reap_orphan_procs.py --dry-run # report only
|
|
python scripts/reap_orphan_procs.py --loop 180 # daemon: reap every 180s
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import signal
|
|
import sys
|
|
import time
|
|
|
|
# Process-name substrings to reap when orphaned (ppid==1).
|
|
TARGETS = ("task-master-mcp", "camoufox-bin")
|
|
|
|
|
|
def _read(path: str) -> str:
|
|
try:
|
|
with open(path, "rb") as f:
|
|
return f.read().decode("utf-8", "replace")
|
|
except OSError:
|
|
return ""
|
|
|
|
|
|
def _proc_info(pid: str) -> tuple[int, str, int] | None:
|
|
"""Return (ppid, cmdline, rss_kb) for a pid, or None if it vanished."""
|
|
status = _read(f"/proc/{pid}/status")
|
|
if not status:
|
|
return None
|
|
ppid, rss = 0, 0
|
|
for line in status.splitlines():
|
|
if line.startswith("PPid:"):
|
|
try: ppid = int(line.split()[1])
|
|
except (IndexError, ValueError): pass
|
|
elif line.startswith("VmRSS:"):
|
|
try: rss = int(line.split()[1])
|
|
except (IndexError, ValueError): pass
|
|
cmd = _read(f"/proc/{pid}/cmdline").replace("\x00", " ").strip()
|
|
return ppid, cmd, rss
|
|
|
|
|
|
def find_orphans() -> list[tuple[str, str, int]]:
|
|
"""Return [(pid, cmd, rss_kb)] of ppid==1 processes matching TARGETS."""
|
|
out = []
|
|
for pid in os.listdir("/proc"):
|
|
if not pid.isdigit():
|
|
continue
|
|
info = _proc_info(pid)
|
|
if not info:
|
|
continue
|
|
ppid, cmd, rss = info
|
|
if ppid == 1 and any(t in cmd for t in TARGETS):
|
|
out.append((pid, cmd, rss))
|
|
return out
|
|
|
|
|
|
def reap(dry_run: bool = False) -> int:
|
|
orphans = find_orphans()
|
|
freed_mb = 0
|
|
for pid, cmd, rss in orphans:
|
|
name = next((t for t in TARGETS if t in cmd), cmd[:30])
|
|
freed_mb += rss // 1024
|
|
if dry_run:
|
|
print(f"[dry-run] would reap pid={pid} ({name}) rss={rss//1024}MB", flush=True)
|
|
continue
|
|
try:
|
|
os.kill(int(pid), signal.SIGKILL)
|
|
print(f"reaped pid={pid} ({name}) rss={rss//1024}MB", flush=True)
|
|
except ProcessLookupError:
|
|
pass
|
|
except PermissionError:
|
|
print(f" permission denied for pid={pid} ({name})", flush=True)
|
|
if orphans:
|
|
print(f"{'would free' if dry_run else 'freed'} ~{freed_mb}MB "
|
|
f"from {len(orphans)} orphan(s)", flush=True)
|
|
return len(orphans)
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser(description="Reap orphaned task-master-mcp / camoufox-bin")
|
|
ap.add_argument("--dry-run", action="store_true", help="report only, kill nothing")
|
|
ap.add_argument("--loop", type=int, default=0, metavar="SECONDS",
|
|
help="run forever, reaping every N seconds")
|
|
args = ap.parse_args()
|
|
if args.loop:
|
|
print(f"reaper loop: every {args.loop}s targets={TARGETS}", flush=True)
|
|
while True:
|
|
try:
|
|
reap(args.dry_run)
|
|
except Exception as e: # never let the daemon die
|
|
print(f"reap error: {e}", flush=True)
|
|
time.sleep(args.loop)
|
|
else:
|
|
reap(args.dry_run)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|