"""Git sync helpers for case repos. Each case lives in its own git repo with a Gitea remote. The remote URL embeds an auth token (https://chaim:TOKEN@host/...). When the token is rotated in Infisical, repos created with the old token will fail to push silently — only logged at WARNING level. ``commit_and_push`` re-injects the *current* token into the existing origin URL on every call, so push survives token rotation. This module also runs a periodic ``sweep_loop`` that catches files written outside the API path (most importantly: agents writing research artefacts directly to the case dir). The full case repo is the user's backup, so anything in the dir must end up on Gitea. """ from __future__ import annotations import asyncio import logging import os import subprocess from pathlib import Path from legal_mcp import config logger = logging.getLogger(__name__) def _gitea_token() -> str: return os.environ.get("GITEA_ACCESS_TOKEN") or os.environ.get("GITEA_TOKEN", "") def _git_env(case_dir: str | Path | None = None) -> dict: env = { "GIT_AUTHOR_NAME": "Ezer Mishpati", "GIT_AUTHOR_EMAIL": "legal@local", "GIT_COMMITTER_NAME": "Ezer Mishpati", "GIT_COMMITTER_EMAIL": "legal@local", "PATH": os.environ.get("PATH", "/usr/bin:/bin"), "GIT_TERMINAL_PROMPT": "0", } if case_dir is not None: # Trust the case dir even when the running uid differs from the # owner (prod container is uniform-root, but host runs may not be). env["GIT_CONFIG_COUNT"] = "1" env["GIT_CONFIG_KEY_0"] = "safe.directory" env["GIT_CONFIG_VALUE_0"] = str(case_dir) return env def _refresh_remote_url(case_dir: Path, env: dict) -> bool: result = subprocess.run( ["git", "remote", "get-url", "origin"], cwd=case_dir, capture_output=True, text=True, ) if result.returncode != 0: return False current_url = result.stdout.strip() if "@" in current_url and current_url.startswith("https://"): bare_url = "https://" + current_url.split("@", 1)[1] else: bare_url = current_url token = _gitea_token() if not token: return True # Push without auth — will fail, but caller decides what to do auth_url = bare_url.replace("https://", f"https://chaim:{token}@") if auth_url != current_url: subprocess.run( ["git", "remote", "set-url", "origin", auth_url], cwd=case_dir, capture_output=True, env=env, ) return True def commit_and_push(case_dir: str | Path, message: str) -> bool: """Stage, commit, refresh origin URL with current token, and push. Best-effort: on failure logs at WARNING and returns False, but never raises. Continues to push even if the commit was a no-op (in case earlier commits are unpushed). """ case_dir = Path(case_dir) if not (case_dir / ".git").exists(): return False env = _git_env(case_dir) subprocess.run(["git", "add", "."], cwd=case_dir, capture_output=True, env=env) commit = subprocess.run( ["git", "commit", "-m", message], cwd=case_dir, capture_output=True, text=True, env=env, ) if commit.returncode != 0 and "nothing to commit" not in commit.stdout: logger.warning("Git commit failed in %s: %s", case_dir, commit.stderr or commit.stdout) if not _refresh_remote_url(case_dir, env): logger.warning("No origin remote configured in %s — skipping push", case_dir) return False push = subprocess.run( ["git", "push"], cwd=case_dir, capture_output=True, text=True, env=env, ) if push.returncode != 0: logger.warning("Git push failed in %s: %s", case_dir, push.stderr) return False return True # ── Periodic sweep ──────────────────────────────────────────────── # # The user's expectation is that "anything I or an agent puts into a case # dir ends up on Gitea". Explicit commit_and_push calls cover the API # write paths, but agents write research/draft files directly to disk. # A short periodic sweep is the safety net. _SWEEP_INTERVAL_SEC = 30 def _porcelain_changes(case_dir: Path, env: dict) -> list[str]: """Return list of `git status --porcelain` lines, or [] if clean/error.""" res = subprocess.run( ["git", "status", "--porcelain"], cwd=case_dir, capture_output=True, text=True, env=env, ) if res.returncode != 0: return [] return [ln for ln in res.stdout.splitlines() if ln.strip()] def _auto_message(changes: list[str]) -> str: """Build a Hebrew commit message from porcelain output. Groups by top-level subdir under the case dir so a sweep that picks up one DOCX export plus one research file produces a useful summary instead of "auto-sync". """ groups: dict[str, int] = {} sample: dict[str, str] = {} for line in changes: path = line[3:].strip().strip('"') if "->" in path: # rename path = path.split("->", 1)[1].strip().strip('"') first = path.split("/", 1)[0] groups[first] = groups.get(first, 0) + 1 sample.setdefault(first, path) label_map = { "documents": "מסמכים", "drafts": "טיוטות", "exports": "גרסאות", "case.json": "מטא", "notes.md": "הערות", } parts: list[str] = [] for top, count in groups.items(): label = label_map.get(top, top) parts.append(f"{label} ({count})" if count > 1 else label) summary = " · ".join(parts) or "שינויים" return f"אוטו: {summary}" def sweep_once() -> dict: """Walk every case dir and commit+push any dirty changes. Synchronous (subprocess-based) but cheap — `git status --porcelain` on a clean dir is a sub-millisecond operation. Returns a small report suitable for logging. """ base: Path = config.CASES_DIR if not base.exists(): return {"checked": 0, "synced": 0, "errors": 0} checked = synced = errors = 0 for case_dir in base.iterdir(): if not case_dir.is_dir() or not (case_dir / ".git").exists(): continue checked += 1 changes = _porcelain_changes(case_dir, _git_env(case_dir)) if not changes: continue msg = _auto_message(changes) ok = commit_and_push(case_dir, msg) if ok: synced += 1 logger.info("auto-sync committed %d change(s) in %s", len(changes), case_dir.name) else: errors += 1 return {"checked": checked, "synced": synced, "errors": errors} async def sweep_loop(interval_sec: int = _SWEEP_INTERVAL_SEC) -> None: """Background task: run sweep_once forever every interval_sec. Cancellation-safe; logs and continues on transient errors. """ logger.info("git_sync.sweep_loop started (interval=%ds)", interval_sec) while True: try: await asyncio.sleep(interval_sec) # Run the sync subprocess work in a thread to avoid blocking # the FastAPI event loop. await asyncio.to_thread(sweep_once) except asyncio.CancelledError: logger.info("git_sync.sweep_loop cancelled") raise except Exception as exc: logger.warning("git_sync sweep iteration failed: %s", exc)