Merge pull request 'fix(X13): הקשחה נגד דליפת-זיכרון מדפדפנים + reaper ל-task-master-mcp' (#115) from worktree-court-fetch-harden into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m24s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m24s
This commit was merged in pull request #115.
This commit is contained in:
@@ -162,3 +162,5 @@ Service / responsible automation) | סטטוס: verified
|
|||||||
- F5/anti-bot עלול לחסום IP → politeness סדרתי + Camoufox (INV-CF4).
|
- F5/anti-bot עלול לחסום IP → politeness סדרתי + Camoufox (INV-CF4).
|
||||||
- שבירות מול שינויי-אתר → ריכוז selectors במקום אחד + בדיקות-עשן תקופתיות.
|
- שבירות מול שינויי-אתר → ריכוז selectors במקום אחד + בדיקות-עשן תקופתיות.
|
||||||
- גבול-ToS על אתר .gov → INV-CF7 + שיקול-יו"ר.
|
- גבול-ToS על אתר .gov → INV-CF7 + שיקול-יו"ר.
|
||||||
|
- **דליפת-זיכרון מדפדפנים יתומים** (fetch שנתקע/נהרג משאיר `camoufox-bin`) → שלוש שכבות-הגנה:
|
||||||
|
(א) `async with` סוגר את הדפדפן בכל exception; (ב) `asyncio.wait_for` קשיח (`COURT_FETCH_HARD_TIMEOUT_S`, ברירת-מחדל 180ש') מבטל hang + reap; (ג) reaper של `camoufox-bin` יתומים (`ppid=1`) לפני/אחרי כל fetch + דמון `legal-reaper` (pm2) + תקרת `max_memory_restart`. סדרתיות (INV-CF4) מבטיחה שכל דפדפן `ppid=1` הוא שארית בטוחה-להריגה. **הערה:** הדליפה הגדולה בפועל בשרת היא `task-master-mcp` (כלי נפרד), שגם אותו ה-reaper מנקה.
|
||||||
|
|||||||
@@ -53,6 +53,50 @@ _DISPLAY = os.environ.get("DISPLAY", "")
|
|||||||
_NAV_TIMEOUT_MS = int(float(os.environ.get("COURT_FETCH_BROWSER_TIMEOUT_S", "60")) * 1000)
|
_NAV_TIMEOUT_MS = int(float(os.environ.get("COURT_FETCH_BROWSER_TIMEOUT_S", "60")) * 1000)
|
||||||
_PAGE_BATCH = 4 # the viewer's GetImages batch size
|
_PAGE_BATCH = 4 # the viewer's GetImages batch size
|
||||||
_MAX_PAGES = 400 # hard cap on a single document
|
_MAX_PAGES = 400 # hard cap on a single document
|
||||||
|
# Hard wall-clock cap on a single fetch so a hung browser can't pin a Firefox
|
||||||
|
# process forever (anti-leak; INV-CF4 politeness). The async-with cleanup runs
|
||||||
|
# on the resulting CancelledError, tearing the browser down.
|
||||||
|
_FETCH_HARD_TIMEOUT_S = float(os.environ.get("COURT_FETCH_HARD_TIMEOUT_S", "180"))
|
||||||
|
|
||||||
|
|
||||||
|
def _reap_orphan_browsers() -> int:
|
||||||
|
"""Kill any ``camoufox-bin`` orphaned to ``ppid=1`` before we launch.
|
||||||
|
|
||||||
|
Fetching is serial (INV-CF4), so any browser not owned by a live parent is
|
||||||
|
a leftover from a prior crashed/killed fetch. Pure /proc, best-effort —
|
||||||
|
never raises into the fetch path.
|
||||||
|
"""
|
||||||
|
killed = 0
|
||||||
|
try:
|
||||||
|
for pid in os.listdir("/proc"):
|
||||||
|
if not pid.isdigit():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
with open(f"/proc/{pid}/status", "rb") as f:
|
||||||
|
status = f.read().decode("utf-8", "replace")
|
||||||
|
with open(f"/proc/{pid}/cmdline", "rb") as f:
|
||||||
|
cmd = f.read().decode("utf-8", "replace")
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
if "camoufox-bin" not in cmd:
|
||||||
|
continue
|
||||||
|
ppid = 0
|
||||||
|
for line in status.splitlines():
|
||||||
|
if line.startswith("PPid:"):
|
||||||
|
try: ppid = int(line.split()[1])
|
||||||
|
except (IndexError, ValueError): pass
|
||||||
|
break
|
||||||
|
if ppid == 1:
|
||||||
|
try:
|
||||||
|
os.kill(int(pid), 9)
|
||||||
|
killed += 1
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
if killed:
|
||||||
|
logger.warning("reaped %d orphaned camoufox-bin before fetch", killed)
|
||||||
|
return killed
|
||||||
|
|
||||||
|
|
||||||
class CamofoxUnavailable(RuntimeError):
|
class CamofoxUnavailable(RuntimeError):
|
||||||
@@ -178,74 +222,93 @@ async def fetch_admin_verdict(
|
|||||||
)
|
)
|
||||||
|
|
||||||
month_year = f"{int(month):02d}-{year[-2:]}"
|
month_year = f"{int(month):02d}-{year[-2:]}"
|
||||||
doc_num = {"v": None}
|
|
||||||
|
|
||||||
async def on_resp(resp):
|
# Belt-and-suspenders against browser leaks: kill any orphaned browser from
|
||||||
if "GetImages" in resp.url and not doc_num["v"]:
|
# a prior crashed fetch before we launch a new one (serial → safe).
|
||||||
try:
|
_reap_orphan_browsers()
|
||||||
doc_num["v"] = json.loads(resp.request.post_data).get("documentNumber")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async with AsyncCamoufox(
|
async def _run() -> dict:
|
||||||
headless=True, geoip=False, humanize=True, locale="he-IL"
|
doc_num = {"v": None}
|
||||||
) as browser:
|
|
||||||
page = await browser.new_page()
|
|
||||||
page.context.on("response", lambda r: asyncio.create_task(on_resp(r)))
|
|
||||||
vp = await _reach_viewer(page, case_number=file_number, month_year=month_year)
|
|
||||||
source_url = vp.url
|
|
||||||
await vp.wait_for_timeout(9000)
|
|
||||||
if not doc_num["v"]:
|
|
||||||
raise NgcsFlowError("לא נלכד documentNumber מהצופה (ייתכן שהמסמך לא נטען)")
|
|
||||||
|
|
||||||
# Pull every page batch through fetch() with X-Requested-With (WAF-safe).
|
async def on_resp(resp):
|
||||||
imgs = await vp.evaluate(
|
if "GetImages" in resp.url and not doc_num["v"]:
|
||||||
"""async (args) => {
|
try:
|
||||||
const [dn, maxPages, batch] = args;
|
doc_num["v"] = json.loads(resp.request.post_data).get("documentNumber")
|
||||||
const url = window.location.href.split('?')[0] + '/GetImages';
|
except Exception:
|
||||||
const out = {};
|
pass
|
||||||
for (let f = 0; f < maxPages; f += batch) {
|
|
||||||
let d;
|
async with AsyncCamoufox(
|
||||||
try {
|
headless=True, geoip=False, humanize=True, locale="he-IL"
|
||||||
const r = await fetch(url, {method:'POST', credentials:'include',
|
) as browser:
|
||||||
headers:{'Content-Type':'application/json; charset=utf-8',
|
page = await browser.new_page()
|
||||||
'X-Requested-With':'XMLHttpRequest'},
|
page.context.on("response", lambda r: asyncio.create_task(on_resp(r)))
|
||||||
body: JSON.stringify({documentNumber:dn, fromIndex:f, toIndex:f+batch-1})});
|
vp = await _reach_viewer(page, case_number=file_number, month_year=month_year)
|
||||||
if (!r.ok) break;
|
source_url = vp.url
|
||||||
const j = await r.json(); d = (j.d !== undefined) ? j.d : j;
|
await vp.wait_for_timeout(9000)
|
||||||
} catch (e) { break; }
|
if not doc_num["v"]:
|
||||||
if (!Array.isArray(d) || d.length === 0) break;
|
raise NgcsFlowError("לא נלכד documentNumber מהצופה (ייתכן שהמסמך לא נטען)")
|
||||||
d.forEach((html, k) => { if (html) out[f+k] = html; });
|
|
||||||
if (d.length < batch) break;
|
# Pull every page batch through fetch() with X-Requested-With (WAF-safe).
|
||||||
await new Promise(r => setTimeout(r, 350));
|
imgs = await vp.evaluate(
|
||||||
}
|
"""async (args) => {
|
||||||
return out;
|
const [dn, maxPages, batch] = args;
|
||||||
}""",
|
const url = window.location.href.split('?')[0] + '/GetImages';
|
||||||
[doc_num["v"], _MAX_PAGES, _PAGE_BATCH],
|
const out = {};
|
||||||
|
for (let f = 0; f < maxPages; f += batch) {
|
||||||
|
let d;
|
||||||
|
try {
|
||||||
|
const r = await fetch(url, {method:'POST', credentials:'include',
|
||||||
|
headers:{'Content-Type':'application/json; charset=utf-8',
|
||||||
|
'X-Requested-With':'XMLHttpRequest'},
|
||||||
|
body: JSON.stringify({documentNumber:dn, fromIndex:f, toIndex:f+batch-1})});
|
||||||
|
if (!r.ok) break;
|
||||||
|
const j = await r.json(); d = (j.d !== undefined) ? j.d : j;
|
||||||
|
} catch (e) { break; }
|
||||||
|
if (!Array.isArray(d) || d.length === 0) break;
|
||||||
|
d.forEach((html, k) => { if (html) out[f+k] = html; });
|
||||||
|
if (d.length < batch) break;
|
||||||
|
await new Promise(r => setTimeout(r, 350));
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}""",
|
||||||
|
[doc_num["v"], _MAX_PAGES, _PAGE_BATCH],
|
||||||
|
)
|
||||||
|
|
||||||
|
if not imgs:
|
||||||
|
raise NgcsFlowError("לא התקבלו עמודי-מסמך מ-GetImages")
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
for idx in sorted(imgs, key=lambda x: int(x)):
|
||||||
|
m = re.search(r"base64,([A-Za-z0-9+/=]+)", imgs[idx] or "")
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
pages.append(Image.open(io.BytesIO(base64.b64decode(m.group(1)))).convert("RGB"))
|
||||||
|
if not pages:
|
||||||
|
raise NgcsFlowError("עמודי-המסמך לא ניתנים לפענוח (base64)")
|
||||||
|
|
||||||
|
buf = io.BytesIO()
|
||||||
|
pages[0].save(buf, format="PDF", save_all=True, append_images=pages[1:])
|
||||||
|
content = buf.getvalue()
|
||||||
|
logger.info("נט המשפט: fetched %s — %d pages, %d bytes",
|
||||||
|
case_number, len(pages), len(content))
|
||||||
|
return {
|
||||||
|
"content": content,
|
||||||
|
"filename": f"{case_number}.pdf",
|
||||||
|
"source_url": source_url,
|
||||||
|
"court": court or "בית משפט מחוזי",
|
||||||
|
"pages": len(pages),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Hard wall-clock cap: on a hung browser, the timeout cancels _run(); the
|
||||||
|
# async-with __aexit__ tears the browser down, and the reap below sweeps any
|
||||||
|
# process that outlived the cancellation.
|
||||||
|
try:
|
||||||
|
return await asyncio.wait_for(_run(), _FETCH_HARD_TIMEOUT_S)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
_reap_orphan_browsers()
|
||||||
|
raise NgcsFlowError(
|
||||||
|
f"אחזור עבר את מגבלת-הזמן ({_FETCH_HARD_TIMEOUT_S:.0f}ש') ובוטל"
|
||||||
)
|
)
|
||||||
|
finally:
|
||||||
if not imgs:
|
_reap_orphan_browsers()
|
||||||
raise NgcsFlowError("לא התקבלו עמודי-מסמך מ-GetImages")
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
pages = []
|
|
||||||
for idx in sorted(imgs, key=lambda x: int(x)):
|
|
||||||
m = re.search(r"base64,([A-Za-z0-9+/=]+)", imgs[idx] or "")
|
|
||||||
if not m:
|
|
||||||
continue
|
|
||||||
pages.append(Image.open(io.BytesIO(base64.b64decode(m.group(1)))).convert("RGB"))
|
|
||||||
if not pages:
|
|
||||||
raise NgcsFlowError("עמודי-המסמך לא ניתנים לפענוח (base64)")
|
|
||||||
|
|
||||||
buf = io.BytesIO()
|
|
||||||
pages[0].save(buf, format="PDF", save_all=True, append_images=pages[1:])
|
|
||||||
content = buf.getvalue()
|
|
||||||
logger.info("נט המשפט: fetched %s — %d pages, %d bytes",
|
|
||||||
case_number, len(pages), len(content))
|
|
||||||
return {
|
|
||||||
"content": content,
|
|
||||||
"filename": f"{case_number}.pdf",
|
|
||||||
"source_url": source_url,
|
|
||||||
"court": court or "בית משפט מחוזי",
|
|
||||||
"pages": len(pages),
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -20,6 +20,8 @@
|
|||||||
| `eval_gold_bootstrap.py` | python | **FU-5 (GAP-11) — bootstrap ל-gold-set** של הערכת-אחזור ל-`data/eval/gold-set.jsonl`. שני מקורות: `--source citations` (cited==relevant מ-`search_relevance_feedback`; ריק עד שייצברו ציטוטים) ו-`--source known_item` (query=שם-תיק → relevant=עצמו; אות אמיתי היום). Idempotent — שומר שורות `source=chair`, מחדש `bootstrap_*`. דורש POSTGRES. | לפני eval; חוזר כשנצבר ground-truth |
|
| `eval_gold_bootstrap.py` | python | **FU-5 (GAP-11) — bootstrap ל-gold-set** של הערכת-אחזור ל-`data/eval/gold-set.jsonl`. שני מקורות: `--source citations` (cited==relevant מ-`search_relevance_feedback`; ריק עד שייצברו ציטוטים) ו-`--source known_item` (query=שם-תיק → relevant=עצמו; אות אמיתי היום). Idempotent — שומר שורות `source=chair`, מחדש `bootstrap_*`. דורש POSTGRES. | לפני eval; חוזר כשנצבר ground-truth |
|
||||||
| `eval_retrieval.py` | python | **FU-5 (GAP-11, INV-RET4/G8) — harness הערכת-אחזור** — מריץ את מסלול-האחזור בייצור (`search_library`/`search_internal`) על ה-gold-set, מחשב precision@k/recall@k/MRR/nDCG@k (k=5,10), מצרף overall+per-corpus+per-PA ל-`data/eval/eval-report-<ts>.{json,md}` + delta מול `data/eval/baseline.json` (מתעד retrieval_config). `--self-test` בודק את המטריקות offline; `--update-baseline` מאמץ snapshot. **שער-CI במשמעת:** הרץ לפני/אחרי כל שינוי בשכבת-האחזור באותו קונפיג. דורש POSTGRES+VOYAGE_API_KEY. | לפני/אחרי שינוי RRF/k/embedder/rerank |
|
| `eval_retrieval.py` | python | **FU-5 (GAP-11, INV-RET4/G8) — harness הערכת-אחזור** — מריץ את מסלול-האחזור בייצור (`search_library`/`search_internal`) על ה-gold-set, מחשב precision@k/recall@k/MRR/nDCG@k (k=5,10), מצרף overall+per-corpus+per-PA ל-`data/eval/eval-report-<ts>.{json,md}` + delta מול `data/eval/baseline.json` (מתעד retrieval_config). `--self-test` בודק את המטריקות offline; `--update-baseline` מאמץ snapshot. **שער-CI במשמעת:** הרץ לפני/אחרי כל שינוי בשכבת-האחזור באותו קונפיג. דורש POSTGRES+VOYAGE_API_KEY. | לפני/אחרי שינוי RRF/k/embedder/rerank |
|
||||||
| `legal-court-fetch-service.config.cjs` | pm2/js | **שירות-מארח Tier-1 לאחזור פסקי-דין מנט המשפט (X13)** — 2 apps: (א) `legal-court-fetch-xvfb` (Xvfb :99, צג-וירטואלי ל-Camoufox); (ב) `legal-court-fetch-service` (`python -m legal_mcp.court_fetch_service.server`, bound `10.0.1.1:8771`, Bearer `COURT_FETCH_SHARED_SECRET` מ-`~/.legal-court-fetch-service.env`, `DISPLAY=:99`). מריץ Camoufox דרך חבילת-הפייתון (in-process) כי הקונטיינר לא יכול דפדפן. תלות: `pip install -e "mcp-server[court-fetch]" && python -m camoufox fetch`. אחזור = ניווט→צופה→`GetImages`(X-Requested-With)→PDF, ללא CAPTCHA; כשל→`ok:false`→orchestrator מסלים ל-fallback אנושי. **אומת על עת"מ 46111-12-22 (34 עמ').** מראָה לדפוס `legal-chat-service.config.cjs`. ספ: `docs/spec/X13-court-fetch.md`. התקנה: `pm2 start scripts/legal-court-fetch-service.config.cjs && pm2 save`. בריאות: `curl http://10.0.1.1:8771/health`. | pm2 (host-side) |
|
| `legal-court-fetch-service.config.cjs` | pm2/js | **שירות-מארח Tier-1 לאחזור פסקי-דין מנט המשפט (X13)** — 2 apps: (א) `legal-court-fetch-xvfb` (Xvfb :99, צג-וירטואלי ל-Camoufox); (ב) `legal-court-fetch-service` (`python -m legal_mcp.court_fetch_service.server`, bound `10.0.1.1:8771`, Bearer `COURT_FETCH_SHARED_SECRET` מ-`~/.legal-court-fetch-service.env`, `DISPLAY=:99`). מריץ Camoufox דרך חבילת-הפייתון (in-process) כי הקונטיינר לא יכול דפדפן. תלות: `pip install -e "mcp-server[court-fetch]" && python -m camoufox fetch`. אחזור = ניווט→צופה→`GetImages`(X-Requested-With)→PDF, ללא CAPTCHA; כשל→`ok:false`→orchestrator מסלים ל-fallback אנושי. **אומת על עת"מ 46111-12-22 (34 עמ').** מראָה לדפוס `legal-chat-service.config.cjs`. ספ: `docs/spec/X13-court-fetch.md`. התקנה: `pm2 start scripts/legal-court-fetch-service.config.cjs && pm2 save`. בריאות: `curl http://10.0.1.1:8771/health`. | pm2 (host-side) |
|
||||||
|
| `reap_orphan_procs.py` | python | **reaper לתהליכים-יתומים שמרווים את שרת Nautilus** — הורג `task-master-mcp` (Node, מתנפח ל~3GB) ו-`camoufox-bin` (Firefox מ-X13 fetch שקרס) **רק כשהם יתומים (`ppid=1`)** — תהליך עם הורה-חי לעולם לא נוגעים בו. `/proc` טהור, בלי psutil. `--dry-run` (דיווח), `--loop N` (דמון כל N ש'). ראה זיכרון [[project_taskmaster_mcp_memory_leak]]. | דרך `legal-reaper.config.cjs` (pm2) |
|
||||||
|
| `legal-reaper.config.cjs` | pm2/js | **דמון pm2 ל-`reap_orphan_procs.py --loop`** (ברירת-מחדל 180ש', `REAP_INTERVAL_S` לעקיפה). `max_memory_restart 100M` (ה-reaper עצמו לא ידלוף). התקנה: `pm2 start scripts/legal-reaper.config.cjs && pm2 save`. לוגים: `pm2 logs legal-reaper`. | pm2 (host-side) |
|
||||||
| `auto-sync-cases.sh` | bash | סנכרון תיקי ערר ל-Gitea — רץ כל דקה | `* * * * *` (cron) |
|
| `auto-sync-cases.sh` | bash | סנכרון תיקי ערר ל-Gitea — רץ כל דקה | `* * * * *` (cron) |
|
||||||
| `backup-db.sh` | bash | גיבוי PostgreSQL יומי ל-`data/backups/` (gzip) | לתזמן: `0 2 * * *` |
|
| `backup-db.sh` | bash | גיבוי PostgreSQL יומי ל-`data/backups/` (gzip) | לתזמן: `0 2 * * *` |
|
||||||
| `restore-db.sh` | bash | שחזור DB מגיבוי (companion ל-backup-db.sh) | ידני |
|
| `restore-db.sh` | bash | שחזור DB מגיבוי (companion ל-backup-db.sh) | ידני |
|
||||||
|
|||||||
35
scripts/legal-reaper.config.cjs
Normal file
35
scripts/legal-reaper.config.cjs
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
/**
|
||||||
|
* pm2 ecosystem entry for legal-reaper — a host-side daemon that periodically
|
||||||
|
* reaps orphaned, runaway processes that saturate the Nautilus box:
|
||||||
|
* - task-master-mcp (Node) orphaned to ppid=1, ballooning to ~3GB each
|
||||||
|
* (memory: project_taskmaster_mcp_memory_leak).
|
||||||
|
* - camoufox-bin (Firefox) leftover from a crashed/killed X13 court fetch.
|
||||||
|
* Only ppid=1 orphans are killed — live, parented processes are never touched.
|
||||||
|
* See scripts/reap_orphan_procs.py for the safety rationale.
|
||||||
|
*
|
||||||
|
* Install (once):
|
||||||
|
* pm2 start /home/chaim/legal-ai/scripts/legal-reaper.config.cjs
|
||||||
|
* pm2 save
|
||||||
|
* Logs:
|
||||||
|
* pm2 logs legal-reaper --lines 50
|
||||||
|
*
|
||||||
|
* Interval defaults to 180s; override with REAP_INTERVAL_S.
|
||||||
|
*/
|
||||||
|
const interval = process.env.REAP_INTERVAL_S || "180";
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
apps: [
|
||||||
|
{
|
||||||
|
name: "legal-reaper",
|
||||||
|
cwd: "/home/chaim/legal-ai",
|
||||||
|
script: "/home/chaim/legal-ai/mcp-server/.venv/bin/python",
|
||||||
|
args: `scripts/reap_orphan_procs.py --loop ${interval}`,
|
||||||
|
env: { HOME: "/home/chaim", PYTHONUNBUFFERED: "1" },
|
||||||
|
autorestart: true,
|
||||||
|
max_restarts: 20,
|
||||||
|
restart_delay: 5000,
|
||||||
|
// The reaper itself is tiny and must never be the thing that leaks.
|
||||||
|
max_memory_restart: "100M",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
119
scripts/reap_orphan_procs.py
Normal file
119
scripts/reap_orphan_procs.py
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Reap orphaned/runaway processes that saturate the Nautilus box.
|
||||||
|
|
||||||
|
Two known offenders (2026-06-07):
|
||||||
|
1. ``task-master-mcp`` (Node) — spawned by the Claude Code VSCode extension,
|
||||||
|
orphaned to ``ppid=1`` when its session ends, then **balloons to ~3GB
|
||||||
|
each**. They accrue as sessions cycle and exhaust RAM within minutes,
|
||||||
|
risking the OOM-killer hitting Postgres/Paperclip. See memory
|
||||||
|
``project_taskmaster_mcp_memory_leak``.
|
||||||
|
2. ``camoufox-bin`` (Firefox) — the X13 court-fetch browser. A fetch that
|
||||||
|
hangs or is killed mid-flight can leave a stray browser orphaned to
|
||||||
|
``ppid=1``. Serial-only fetching means any ``ppid=1`` camoufox-bin is
|
||||||
|
stale and safe to kill.
|
||||||
|
|
||||||
|
Safety: only processes **orphaned to ``ppid=1``** are reaped — a process still
|
||||||
|
owned by a live parent (an attached MCP server, or a browser a fetch is
|
||||||
|
actively using) is never touched. Pure ``/proc`` parsing, no psutil dependency.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/reap_orphan_procs.py # one pass, print what was reaped
|
||||||
|
python scripts/reap_orphan_procs.py --dry-run # report only
|
||||||
|
python scripts/reap_orphan_procs.py --loop 180 # daemon: reap every 180s
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Process-name substrings to reap when orphaned (ppid==1).
|
||||||
|
TARGETS = ("task-master-mcp", "camoufox-bin")
|
||||||
|
|
||||||
|
|
||||||
|
def _read(path: str) -> str:
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
return f.read().decode("utf-8", "replace")
|
||||||
|
except OSError:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _proc_info(pid: str) -> tuple[int, str, int] | None:
|
||||||
|
"""Return (ppid, cmdline, rss_kb) for a pid, or None if it vanished."""
|
||||||
|
status = _read(f"/proc/{pid}/status")
|
||||||
|
if not status:
|
||||||
|
return None
|
||||||
|
ppid, rss = 0, 0
|
||||||
|
for line in status.splitlines():
|
||||||
|
if line.startswith("PPid:"):
|
||||||
|
try: ppid = int(line.split()[1])
|
||||||
|
except (IndexError, ValueError): pass
|
||||||
|
elif line.startswith("VmRSS:"):
|
||||||
|
try: rss = int(line.split()[1])
|
||||||
|
except (IndexError, ValueError): pass
|
||||||
|
cmd = _read(f"/proc/{pid}/cmdline").replace("\x00", " ").strip()
|
||||||
|
return ppid, cmd, rss
|
||||||
|
|
||||||
|
|
||||||
|
def find_orphans() -> list[tuple[str, str, int]]:
|
||||||
|
"""Return [(pid, cmd, rss_kb)] of ppid==1 processes matching TARGETS."""
|
||||||
|
out = []
|
||||||
|
for pid in os.listdir("/proc"):
|
||||||
|
if not pid.isdigit():
|
||||||
|
continue
|
||||||
|
info = _proc_info(pid)
|
||||||
|
if not info:
|
||||||
|
continue
|
||||||
|
ppid, cmd, rss = info
|
||||||
|
if ppid == 1 and any(t in cmd for t in TARGETS):
|
||||||
|
out.append((pid, cmd, rss))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def reap(dry_run: bool = False) -> int:
|
||||||
|
orphans = find_orphans()
|
||||||
|
freed_mb = 0
|
||||||
|
for pid, cmd, rss in orphans:
|
||||||
|
name = next((t for t in TARGETS if t in cmd), cmd[:30])
|
||||||
|
freed_mb += rss // 1024
|
||||||
|
if dry_run:
|
||||||
|
print(f"[dry-run] would reap pid={pid} ({name}) rss={rss//1024}MB", flush=True)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
os.kill(int(pid), signal.SIGKILL)
|
||||||
|
print(f"reaped pid={pid} ({name}) rss={rss//1024}MB", flush=True)
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
except PermissionError:
|
||||||
|
print(f" permission denied for pid={pid} ({name})", flush=True)
|
||||||
|
if orphans:
|
||||||
|
print(f"{'would free' if dry_run else 'freed'} ~{freed_mb}MB "
|
||||||
|
f"from {len(orphans)} orphan(s)", flush=True)
|
||||||
|
return len(orphans)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
ap = argparse.ArgumentParser(description="Reap orphaned task-master-mcp / camoufox-bin")
|
||||||
|
ap.add_argument("--dry-run", action="store_true", help="report only, kill nothing")
|
||||||
|
ap.add_argument("--loop", type=int, default=0, metavar="SECONDS",
|
||||||
|
help="run forever, reaping every N seconds")
|
||||||
|
args = ap.parse_args()
|
||||||
|
if args.loop:
|
||||||
|
print(f"reaper loop: every {args.loop}s targets={TARGETS}", flush=True)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
reap(args.dry_run)
|
||||||
|
except Exception as e: # never let the daemon die
|
||||||
|
print(f"reap error: {e}", flush=True)
|
||||||
|
time.sleep(args.loop)
|
||||||
|
else:
|
||||||
|
reap(args.dry_run)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Reference in New Issue
Block a user