"""Drain the precedent metadata-extraction queue. Calls ``process_pending_extractions(kind='metadata')`` in batches until the queue is empty (two consecutive zero-progress rounds). Metadata extraction runs on **Gemini Flash** (structured JSON) — fast and reliable, unlike the agentic claude CLI which hit ``error_max_turns`` on this bounded task. A no-op (fast) when the queue is empty. Host-only (reads GEMINI_API_KEY + POSTGRES_URL from ~/.env via legal_mcp.config). Scheduled by ``legal-metadata-drain`` (pm2 cron); also runnable by hand: mcp-server/.venv/bin/python scripts/drain_metadata_queue.py [batch] """ import asyncio import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "mcp-server", "src")) from legal_mcp.services import precedent_library as pl async def main() -> int: batch = int(sys.argv[1]) if len(sys.argv) > 1 else 10 total = 0 empty_rounds = 0 rnd = 0 while empty_rounds < 2: rnd += 1 out = await pl.process_pending_extractions(kind="metadata", limit=batch) processed = out.get("processed", 0) total += processed print(f"[round {rnd}] processed={processed} total_pending={out.get('total_pending', 0)} " f"status={out.get('status')}", flush=True) for r in out.get("results", []): print(f" {str(r.get('case_number',''))[:42]}: {r.get('status')}", flush=True) if processed == 0: empty_rounds += 1 await asyncio.sleep(3) else: empty_rounds = 0 print(f"===DONE=== metadata extracted (cumulative cases handled={total})", flush=True) return 0 if __name__ == "__main__": sys.exit(asyncio.run(main()))