"""Configuration loaded from Infisical or central .env file. Priority: Infisical → environment variables → .env file """ import os from pathlib import Path from dotenv import load_dotenv # Load from central .env or override path dotenv_path = os.environ.get("DOTENV_PATH", str(Path.home() / ".env")) load_dotenv(dotenv_path) # Try loading from Infisical if configured INFISICAL_TOKEN = os.environ.get("INFISICAL_TOKEN", "") if INFISICAL_TOKEN: try: from infisical_sdk import InfisicalSDKClient _client = InfisicalSDKClient(token=INFISICAL_TOKEN) _secrets = _client.get_all_secrets( environment=os.environ.get("INFISICAL_ENV", "production"), project_id=os.environ.get("INFISICAL_PROJECT_ID", ""), ) for s in _secrets: os.environ.setdefault(s.secret_key, s.secret_value) except ImportError: pass # Infisical SDK not installed — use .env except Exception: pass # Infisical unreachable — fall back to .env # PostgreSQL POSTGRES_URL = os.environ.get( "POSTGRES_URL", f"postgres://{os.environ.get('POSTGRES_USER', 'legal_ai')}:" f"{os.environ.get('POSTGRES_PASSWORD', '')}@" f"{os.environ.get('POSTGRES_HOST', '127.0.0.1')}:" f"{os.environ.get('POSTGRES_PORT', '5433')}/" f"{os.environ.get('POSTGRES_DB', 'legal_ai')}", ) # Redis REDIS_URL = os.environ.get("REDIS_URL", "redis://127.0.0.1:6380/0") # Claude CLI — model + effort for halacha extraction. # All LLM calls go through the local `claude -p` CLI (claude_session.py). # By default the CLI uses the developer's session default model with no # explicit effort. For halacha extraction we pin Opus 4.8 @ xhigh: the # 2026-05-31 A/B (scripts/ab_halacha_opus48.py) showed it cuts over-extraction # (~124→51 on שטיין) at 100% quote-verification with honest confidence # calibration. Env-overridable so the model/effort can be tuned without a # code change (set to "" to fall back to the CLI default). Other extractors # (claims, metadata, block-writing, QA) keep the CLI default unless similarly # pinned. HALACHA_EXTRACT_MODEL = os.environ.get("HALACHA_EXTRACT_MODEL", "claude-opus-4-8") HALACHA_EXTRACT_EFFORT = os.environ.get("HALACHA_EXTRACT_EFFORT", "xhigh") # Effort for BULK queue-drain extraction (process_pending over many precedents). # xhigh is the quality sweet-spot for a single precedent but very slow at scale # (a 64-chunk case ≈ 20 min). Bulk drains use a lighter effort to cut wall-clock; # interactive single re-extraction keeps HALACHA_EXTRACT_EFFORT (xhigh). Tune via # env (set to 'xhigh' to make bulk match single, or 'medium' for max speed). HALACHA_BULK_EXTRACT_EFFORT = os.environ.get("HALACHA_BULK_EXTRACT_EFFORT", "high") # Concurrent chunks WITHIN a single extraction. Each `claude -p` @ xhigh holds # ~300MB RSS + heavy CPU; cross-process overlap (agent retries) on top of this # froze the box on 2026-05-31 (hard reboot). A global advisory lock now caps # the system to ONE extraction at a time; this caps the chunks within it. HALACHA_CHUNK_CONCURRENCY = int(os.environ.get("HALACHA_CHUNK_CONCURRENCY", "3")) HALACHA_CORROBORATION_MATCH_FLOOR = float(os.environ.get("HALACHA_CORROBORATION_MATCH_FLOOR", "0.50")) HALACHA_CORROBORATION_MIN_CITES = int(os.environ.get("HALACHA_CORROBORATION_MIN_CITES", "2")) # X11 Phase 2: gate corroboration → approval. Default ON (Dafna validated the # Phase 1 signal, 2026-06-01). Set to "false" to disable the auto-approve/demote # wiring while keeping the Phase 1 signal intact. HALACHA_CORROBORATION_AUTO_APPROVE = os.environ.get( "HALACHA_CORROBORATION_AUTO_APPROVE", "true" ).strip().lower() in ("1", "true", "yes", "on") # Voyage AI VOYAGE_API_KEY = os.environ.get("VOYAGE_API_KEY", "") VOYAGE_MODEL = os.environ.get("VOYAGE_MODEL", "voyage-law-2") VOYAGE_DIMENSIONS = 1024 # Rerank — cross-encoder second-stage. Off by default; flip with env to # enable across all semantic search tools (search_decisions, # search_case_documents, find_similar_cases, search_precedent_library). VOYAGE_RERANK_MODEL = os.environ.get("VOYAGE_RERANK_MODEL", "rerank-2") VOYAGE_RERANK_ENABLED = ( os.environ.get("VOYAGE_RERANK_ENABLED", "false").lower() == "true" ) # How many candidates to fetch from bi-encoder before reranking. # 50 was the depth used in the POC; balances recall vs rerank cost. VOYAGE_RERANK_FETCH_K = int(os.environ.get("VOYAGE_RERANK_FETCH_K", "50")) # Multimodal — page-image embeddings via voyage-multimodal-3. Off by # default; flip with env to enable per-page image embedding during # ingestion + hybrid (text+image) ranking at search time. POC #3 # validated on a 89-page appraisal PDF (38s, 312K tokens, recovered # table structure + image-only scanned pages that text-OCR misses). MULTIMODAL_ENABLED = ( os.environ.get("MULTIMODAL_ENABLED", "false").lower() == "true" ) MULTIMODAL_MODEL = os.environ.get("MULTIMODAL_MODEL", "voyage-multimodal-3") # Render DPI for the image fed to the embedder. POC used 144 — sweet # spot between embedding quality and tokens/page (144 ≈ 3.5K tok/page). MULTIMODAL_DPI = int(os.environ.get("MULTIMODAL_DPI", "144")) # Separate, lower DPI for the JPEG thumbnail saved to disk for UI # preview. ~96dpi → ~20KB/page; ingestion-time, no re-render at view. MULTIMODAL_THUMB_DPI = int(os.environ.get("MULTIMODAL_THUMB_DPI", "96")) # Hybrid merge: Reciprocal Rank Fusion (RRF) bias for the *text* side. # voyage-3 cosine scores (~0.4-0.5) and voyage-multimodal-3 scores # (~0.20-0.25) live on different scales; a direct weighted sum lets # text always dominate. RRF is rank-based and robust to that. The # weight here biases the contribution of each side: 0.5 = balanced # (vanilla RRF), >0.5 favours text, <0.5 favours image. Tunable per # env without redeploy. MULTIMODAL_TEXT_WEIGHT = float( os.environ.get("MULTIMODAL_TEXT_WEIGHT", "0.5") ) # RRF damping constant. Standard literature value is 60: lower values # concentrate weight at top ranks; higher values flatten the curve. MULTIMODAL_RRF_K = int(os.environ.get("MULTIMODAL_RRF_K", "60")) # BM25/lexical hybrid — fuse ``ts_rank_cd`` over ``content_tsv``/ # ``rule_tsv`` (DB schema V12) with the semantic cosine layer via RRF. # Recovers recall on exact-string queries that voyage embeddings blur # (e.g. case-number citations like "1461/20", "317/10"; rare planning # vocabulary). Hebrew uses the ``simple`` text-search config — no # stemmer needed, and numeric/punctuation tokens stay intact. When # disabled, hybrid search falls back to semantic-only (the previous # behaviour). On by default — the lexical leg is cheap (GIN index) and # only ever *adds* candidates to RRF, it can't down-rank a strong # semantic hit. BM25_HYBRID_ENABLED = ( os.environ.get("BM25_HYBRID_ENABLED", "true").lower() == "true" ) # Halacha extraction — auto-approve threshold. Halachot with extractor # confidence >= this value are inserted with review_status='approved' # instead of 'pending_review' (so they immediately appear in # search_precedent_library). Set to a value > 1.0 to disable auto-approval. # 0.80 baseline: 89% of historical extractions land here, manual spot-check # of 10 random samples confirmed quality. Tunable via env if drift is # observed (e.g. raise to 0.90 if false-positives appear). HALACHA_AUTO_APPROVE_THRESHOLD = float( os.environ.get("HALACHA_AUTO_APPROVE_THRESHOLD", "0.80") ) # Halacha dedup-on-insert — within-precedent semantic cosine ceiling. Before # storing a halacha, store_halachot_for_chunk skips it if its rule-embedding has # cosine >= this value against an already-stored halacha of the SAME precedent # (exact normalized supporting_quote is always skipped regardless). 0.93 is the # conservative auto-skip floor: the 2026-06-03 cleanup showed the 0.90-0.95 band # is "almost entirely" same-rule-reworded, but auto-skip is unreviewed so we sit # just above the manual-cleanup 0.90 to avoid dropping a genuinely distinct # principle. Set > 1.0 to disable semantic dedup (exact-quote dedup still runs). HALACHA_DEDUP_COSINE = float(os.environ.get("HALACHA_DEDUP_COSINE", "0.93")) # Halacha dedup TAIL band (#82.3) — the [BAND_COSINE, DEDUP_COSINE) range is too # low to auto-skip but suspicious. A halacha whose nearest same-precedent # neighbor sits in this band AND has high LEXICAL overlap (Jaccard/Levenshtein # on rule_statement) is flagged 'near_duplicate' (blocks auto-approve → review), # not skipped — catching paraphrases the cosine threshold misses without # dropping a possibly-distinct principle unreviewed. 0.83 from the same cleanup. HALACHA_DEDUP_BAND_COSINE = float(os.environ.get("HALACHA_DEDUP_BAND_COSINE", "0.83")) # Halacha review-queue clustering (#84.2) — when the review queue is requested # with cluster=true, halachot of the SAME precedent whose rule-embeddings are # within this cosine are grouped into ONE review card (canonical + variants), so # the chair judges near-identical principles once instead of repeatedly. Display # only — never merges/deletes. 0.90 = "same principle, reworded". HALACHA_CLUSTER_COSINE = float(os.environ.get("HALACHA_CLUSTER_COSINE", "0.90")) # Halacha NLI entailment validator (#81.3) — after extraction, a claude_session # judge checks each halacha's rule_statement is entailed by its supporting_quote. # Non-entailed (neutral/contradiction) → quality flag 'nli_unsupported' that # blocks auto-approve. Runs through the local CLI (zero cost); fails OPEN if the # CLI is unavailable (e.g. container). 'low' effort — entailment is a simple call. HALACHA_NLI_ENABLED = os.environ.get("HALACHA_NLI_ENABLED", "true").lower() == "true" HALACHA_NLI_MODEL = os.environ.get("HALACHA_NLI_MODEL", HALACHA_EXTRACT_MODEL) HALACHA_NLI_EFFORT = os.environ.get("HALACHA_NLI_EFFORT", "low") # Halacha over-extraction consolidation (#81.5) — after a precedent finishes # extracting, a claude_session pass folds facets of the SAME legal question # (below the #82 dedup cosine) into one canonical; the rest are marked rejected # (reversible). Cross-chunk safety net for over-splitting. Runs through the local # CLI (zero cost); fails OPEN. 'high' effort — folding needs careful judgment. HALACHA_CONSOLIDATE_ENABLED = os.environ.get("HALACHA_CONSOLIDATE_ENABLED", "true").lower() == "true" HALACHA_CONSOLIDATE_MODEL = os.environ.get("HALACHA_CONSOLIDATE_MODEL", HALACHA_EXTRACT_MODEL) HALACHA_CONSOLIDATE_EFFORT = os.environ.get("HALACHA_CONSOLIDATE_EFFORT", "high") # Google Cloud Vision (OCR for scanned PDFs) GOOGLE_CLOUD_VISION_API_KEY = os.environ.get("GOOGLE_CLOUD_VISION_API_KEY", "") # Data directory DATA_DIR = Path(os.environ.get("DATA_DIR", str(Path.home() / "legal-ai" / "data"))) TRAINING_DIR = DATA_DIR / "training" EXPORTS_DIR = DATA_DIR / "exports" # legacy exports only # Cases directory — flat structure: data/cases/{case_number}/ CASES_DIR = DATA_DIR / "cases" def find_case_dir(case_number: str) -> Path: """Return the case directory for a given case number.""" return CASES_DIR / case_number # Chunking parameters CHUNK_SIZE_TOKENS = 600 CHUNK_OVERLAP_TOKENS = 100 # Parent-doc retrieval (TaskMaster #48) — hierarchical chunking + lookup. # When enabled: # - The ingest pipeline emits two tiers of precedent_chunks: small # "child" chunks (~300 tokens) for high-recall semantic/lexical # matching, and larger "parent" chunks (~1500 tokens) that contain # ~5 children each. Children are embedded and indexed; parents # carry the broader text the LLM gets back. # - Search runs against children, then swaps each hit for its parent # row before returning — so the writer sees a coherent passage # instead of a 300-token sliver. # # Off by default: the schema (V17) is safe to apply even when the flag # is false (the chunker still emits single-tier chunks and search just # returns them unchanged). Flip to true ONLY after the corpus has been # re-ingested with the hierarchical chunker — see precedent_library # ingest pipeline + the backfill plan in TaskMaster #48. PARENT_DOC_RETRIEVAL_ENABLED = ( os.environ.get("PARENT_DOC_RETRIEVAL_ENABLED", "false").lower() == "true" ) # Child chunks are what get embedded + matched. Smaller = higher recall, # more rows. 300 tokens (~600 chars Hebrew) is the empirical sweet spot # referenced in the original parent-doc literature (Anthropic, LlamaIndex). PARENT_DOC_CHILD_SIZE_TOKENS = int( os.environ.get("PARENT_DOC_CHILD_SIZE_TOKENS", "300") ) # Parent chunks are what get returned to the LLM. Large enough to hold # a full rule statement plus the surrounding paragraph and any cited # authority. 1500 tokens = ~5 children at 300 each. PARENT_DOC_PARENT_SIZE_TOKENS = int( os.environ.get("PARENT_DOC_PARENT_SIZE_TOKENS", "1500") ) # Child overlap — keeps neighbouring children sharing ~50 tokens so a # sentence on a chunk boundary still matches the natural phrasing. PARENT_DOC_CHILD_OVERLAP_TOKENS = int( os.environ.get("PARENT_DOC_CHILD_OVERLAP_TOKENS", "50") ) # External service allowlist — case materials may ONLY be sent to these domains ALLOWED_EXTERNAL_SERVICES = { "api.voyageai.com", # Voyage AI (embeddings) "vision.googleapis.com", # Google Cloud Vision (OCR) } # Audit AUDIT_ENABLED = os.environ.get("AUDIT_ENABLED", "true").lower() == "true" # ── Utility ─────────────────────────────────────────────────────── def parse_llm_json(raw: str): """Parse JSON from LLM response, handling markdown wrapping and truncation. Handles: 1. Markdown ```json ... ``` code blocks 2. Extra text before/after JSON 3. Truncated JSON (missing closing brackets) — attempts recovery """ import json import re raw = raw.strip() # Strip markdown code blocks raw = re.sub(r"^```(?:json)?\s*\n?", "", raw) raw = re.sub(r"\n?\s*```\s*$", "", raw) # Try direct parse first try: return json.loads(raw) except json.JSONDecodeError: pass # Try to find JSON object or array for pattern in [r"\{.*\}", r"\[.*\]"]: match = re.search(pattern, raw, re.DOTALL) if match: try: return json.loads(match.group()) except json.JSONDecodeError: continue # Attempt truncated JSON recovery: # Find the start of JSON, then try closing open brackets for opener, closer in [("[", "]"), ("{", "}")]: start = raw.find(opener) if start < 0: continue fragment = raw[start:] # Try progressively removing trailing partial content and closing # Look for the last complete item (ending with }, or ]) for end_pattern in [r'.*\}(?=\s*,?\s*$)', r'.*\](?=\s*,?\s*$)', r'.*"(?=\s*$)']: pass # fallback below # Simple approach: find last complete JSON item boundary # For arrays: find last "}" and close the array if opener == "[": last_brace = fragment.rfind("}") if last_brace > 0: truncated = fragment[:last_brace + 1] + "]" try: return json.loads(truncated) except json.JSONDecodeError: pass # For objects: find last complete key-value if opener == "{": last_brace = fragment.rfind("}") if last_brace > 0: # Check if this closes a nested object — try adding outer close truncated = fragment[:last_brace + 1] # Count unclosed braces open_count = truncated.count("{") - truncated.count("}") truncated += "}" * open_count try: return json.loads(truncated) except json.JSONDecodeError: pass return None