diff --git a/src/tools/build-legal-stance.ts b/src/tools/build-legal-stance.ts index 91bcc25..442794c 100644 --- a/src/tools/build-legal-stance.ts +++ b/src/tools/build-legal-stance.ts @@ -1,9 +1,10 @@ /** - * build_legal_stance -- Build a comprehensive set of citations for a legal question. + * build_legal_stance — Build a comprehensive set of citations for a legal question. */ import type Database from '@ansvar/mcp-sqlite'; -import { buildFtsQueryVariants, sanitizeFtsInput } from '../utils/fts-query.js'; +import { buildFtsQueryVariants, buildLikePattern, sanitizeFtsInput } from '../utils/fts-query.js'; +import { resolveDocumentId } from '../utils/statute-id.js'; import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js'; export interface BuildLegalStanceInput { @@ -31,8 +32,26 @@ export async function buildLegalStance( } const limit = Math.min(Math.max(input.limit ?? 5, 1), 20); + const fetchLimit = limit * 2; const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query)); + // Resolve document_id from title if provided + let resolvedDocId: string | undefined; + if (input.document_id) { + const resolved = resolveDocumentId(db, input.document_id); + resolvedDocId = resolved ?? undefined; + if (!resolved) { + return { + results: [], + _metadata: { + ...generateResponseMetadata(db), + note: `No document found matching "${input.document_id}"`, + }, + }; + } + } + + let queryStrategy = 'none'; for (const ftsQuery of queryVariants) { let sql = ` SELECT @@ -50,23 +69,93 @@ export async function buildLegalStance( `; const params: (string | number)[] = [ftsQuery]; - if (input.document_id) { + if (resolvedDocId) { sql += ' AND lp.document_id = ?'; - params.push(input.document_id); + params.push(resolvedDocId); } sql += ' ORDER BY relevance LIMIT ?'; - params.push(limit); + params.push(fetchLimit); try { const rows = db.prepare(sql).all(...params) as LegalStanceResult[]; if (rows.length > 0) { - return { results: rows, _metadata: generateResponseMetadata(db) }; + queryStrategy = ftsQuery === queryVariants[0] ? 'exact' : 'fallback'; + const deduped = deduplicateResults(rows, limit); + return { + results: deduped, + _metadata: { + ...generateResponseMetadata(db), + ...(queryStrategy === 'fallback' ? { query_strategy: 'broadened' } : {}), + }, + }; } } catch { continue; } } + // LIKE fallback — final tier when FTS5 returns no results + { + const likePattern = buildLikePattern(sanitizeFtsInput(input.query)); + let likeSql = ` + SELECT + lp.document_id, + ld.title as document_title, + lp.provision_ref, + lp.section, + lp.title, + substr(lp.content, 1, 300) as snippet, + 0 as relevance + FROM legal_provisions lp + JOIN legal_documents ld ON ld.id = lp.document_id + WHERE lp.content LIKE ? + `; + const likeParams: (string | number)[] = [likePattern]; + + if (resolvedDocId) { + likeSql += ' AND lp.document_id = ?'; + likeParams.push(resolvedDocId); + } + + likeSql += ' LIMIT ?'; + likeParams.push(fetchLimit); + + try { + const rows = db.prepare(likeSql).all(...likeParams) as LegalStanceResult[]; + if (rows.length > 0) { + return { + results: deduplicateResults(rows, limit), + _metadata: { + ...generateResponseMetadata(db), + query_strategy: 'like_fallback', + }, + }; + } + } catch { + // LIKE query failed + } + } + return { results: [], _metadata: generateResponseMetadata(db) }; } + +/** + * Deduplicate results by document_title + provision_ref. + * Duplicate document IDs (numeric vs slug) cause the same provision to appear twice. + */ +function deduplicateResults( + rows: LegalStanceResult[], + limit: number, +): LegalStanceResult[] { + const seen = new Set(); + const deduped: LegalStanceResult[] = []; + for (const row of rows) { + const key = `${row.document_title}::${row.provision_ref}`; + if (seen.has(key)) continue; + seen.add(key); + deduped.push(row); + if (deduped.length >= limit) break; + } + return deduped; +} diff --git a/src/tools/search-legislation.ts b/src/tools/search-legislation.ts index 4d21bdd..ff2259d 100644 --- a/src/tools/search-legislation.ts +++ b/src/tools/search-legislation.ts @@ -1,10 +1,11 @@ /** - * search_legislation -- Full-text search across Israeli statute provisions. + * search_legislation — Full-text search across Israeli statute provisions. */ import type Database from '@ansvar/mcp-sqlite'; -import { buildFtsQueryVariants, sanitizeFtsInput } from '../utils/fts-query.js'; +import { buildFtsQueryVariants, buildLikePattern, sanitizeFtsInput } from '../utils/fts-query.js'; import { normalizeAsOfDate } from '../utils/as-of-date.js'; +import { resolveDocumentId } from '../utils/statute-id.js'; import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js'; export interface SearchLegislationInput { @@ -38,8 +39,27 @@ export async function searchLegislation( } const limit = Math.min(Math.max(input.limit ?? DEFAULT_LIMIT, 1), MAX_LIMIT); + // Fetch extra rows to account for deduplication + const fetchLimit = limit * 2; const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query)); + // Resolve document_id from title if provided (same resolution as get_provision) + let resolvedDocId: string | undefined; + if (input.document_id) { + const resolved = resolveDocumentId(db, input.document_id); + resolvedDocId = resolved ?? undefined; + if (!resolved) { + return { + results: [], + _metadata: { + ...generateResponseMetadata(db), + note: `No document found matching "${input.document_id}"`, + }, + }; + } + } + + let queryStrategy = 'none'; for (const ftsQuery of queryVariants) { let sql = ` SELECT @@ -58,9 +78,9 @@ export async function searchLegislation( `; const params: (string | number)[] = [ftsQuery]; - if (input.document_id) { + if (resolvedDocId) { sql += ' AND lp.document_id = ?'; - params.push(input.document_id); + params.push(resolvedDocId); } if (input.status) { @@ -69,18 +89,95 @@ export async function searchLegislation( } sql += ' ORDER BY relevance LIMIT ?'; - params.push(limit); + params.push(fetchLimit); try { const rows = db.prepare(sql).all(...params) as SearchLegislationResult[]; if (rows.length > 0) { - return { results: rows, _metadata: generateResponseMetadata(db) }; + queryStrategy = ftsQuery === queryVariants[0] ? 'exact' : 'fallback'; + const deduped = deduplicateResults(rows, limit); + return { + results: deduped, + _metadata: { + ...generateResponseMetadata(db), + ...(queryStrategy === 'fallback' ? { query_strategy: 'broadened' } : {}), + }, + }; } } catch { - // FTS query syntax error -- try next variant + // FTS query syntax error — try next variant continue; } } + // LIKE fallback — final tier when FTS5 returns no results + { + const likePattern = buildLikePattern(sanitizeFtsInput(input.query)); + let likeSql = ` + SELECT + lp.document_id, + ld.title as document_title, + lp.provision_ref, + lp.chapter, + lp.section, + lp.title, + substr(lp.content, 1, 200) as snippet, + 0 as relevance + FROM legal_provisions lp + JOIN legal_documents ld ON ld.id = lp.document_id + WHERE lp.content LIKE ? + `; + const likeParams: (string | number)[] = [likePattern]; + + if (resolvedDocId) { + likeSql += ' AND lp.document_id = ?'; + likeParams.push(resolvedDocId); + } + + if (input.status) { + likeSql += ' AND ld.status = ?'; + likeParams.push(input.status); + } + + likeSql += ' LIMIT ?'; + likeParams.push(fetchLimit); + + try { + const rows = db.prepare(likeSql).all(...likeParams) as SearchLegislationResult[]; + if (rows.length > 0) { + return { + results: deduplicateResults(rows, limit), + _metadata: { + ...generateResponseMetadata(db), + query_strategy: 'like_fallback', + }, + }; + } + } catch { + // LIKE query failed + } + } + return { results: [], _metadata: generateResponseMetadata(db) }; } + +/** + * Deduplicate search results by document_title + provision_ref. + * Duplicate document IDs (numeric vs slug) cause the same provision to appear twice. + * Keeps the first (highest-ranked) occurrence. + */ +function deduplicateResults( + rows: SearchLegislationResult[], + limit: number, +): SearchLegislationResult[] { + const seen = new Set(); + const deduped: SearchLegislationResult[] = []; + for (const row of rows) { + const key = `${row.document_title}::${row.provision_ref}`; + if (seen.has(key)) continue; + seen.add(key); + deduped.push(row); + if (deduped.length >= limit) break; + } + return deduped; +} diff --git a/src/utils/fts-query.ts b/src/utils/fts-query.ts index 36f198f..086ffc9 100644 --- a/src/utils/fts-query.ts +++ b/src/utils/fts-query.ts @@ -4,49 +4,115 @@ * Handles query sanitization and variant generation for SQLite FTS5. */ +const FTS5_BOOLEAN_OPS = /\b(AND|OR|NOT)\b/; + +/** + * Detect whether input contains FTS5 boolean operators. + */ +export function hasBooleanOperators(input: string): boolean { + return FTS5_BOOLEAN_OPS.test(input); +} + /** * Sanitize user input for safe FTS5 queries. - * Removes characters that have special meaning in FTS5 syntax. + * Preserves boolean operators (AND, OR, NOT) when detected. */ export function sanitizeFtsInput(input: string): string { + if (hasBooleanOperators(input)) { + // Preserve boolean structure: only strip dangerous chars, keep quotes and parens + return input.replace(/[{}[\]^~*:]/g, ' ').replace(/\s+/g, ' ').trim(); + } + // Preserve trailing * on words (FTS5 prefix search) but strip other special chars return input - .replace(/['"(){}[\]^~*:]/g, ' ') + .replace(/['"(){}[\]^~:]/g, ' ') + .replace(/\*(?!\s|$)/g, ' ') // strip * unless at end of word .replace(/\s+/g, ' ') .trim(); } +/** + * Truncate common English suffixes for stemming fallback. + * Returns stem + "*" ready string, or null if no stemming possible. + */ +function stemWord(word: string): string | null { + if (word.length < 5) return null; + const lower = word.toLowerCase(); + for (const suffix of [ + 'ies', 'ing', 'ers', 'tion', 'ment', 'ness', + 'able', 'ible', 'ous', 'ive', 'ed', 'es', 'er', 'ly', 's', + ]) { + if (lower.endsWith(suffix) && lower.length - suffix.length >= 3) { + return lower.slice(0, -suffix.length); + } + } + return null; +} + /** * Build FTS5 query variants for a search term. * Returns variants in order of specificity (most specific first): * 1. Exact phrase match * 2. All terms required (AND) - * 3. Prefix match on last term + * 3. Prefix AND (last term gets prefix wildcard) + * 4. Stemmed prefix (suffix-truncated + wildcard) + * 5. Any term matches (OR) — broad fallback + * + * When boolean operators are detected, passes query through as-is. */ export function buildFtsQueryVariants(sanitized: string): string[] { if (!sanitized || sanitized.trim().length === 0) { return []; } + // Boolean passthrough — user knows what they want + if (hasBooleanOperators(sanitized)) { + return [sanitized]; + } + const terms = sanitized.split(/\s+/).filter(t => t.length > 0); if (terms.length === 0) return []; const variants: string[] = []; - // Exact phrase if (terms.length > 1) { + // Exact phrase variants.push(`"${terms.join(' ')}"`); + // AND query + variants.push(terms.join(' AND ')); + // Prefix AND on last term + variants.push([...terms.slice(0, -1), `${terms[terms.length - 1]}*`].join(' AND ')); + } else { + // Single term + variants.push(terms[0]); + if (terms[0].length >= 3) { + variants.push(`${terms[0]}*`); + } } - // AND query - variants.push(terms.join(' AND ')); + // Stemmed variant — truncate suffixes + wildcard + const stemmedTerms = terms.map(t => { + const stem = stemWord(t); + return stem ? `${stem}*` : t; + }); + if (stemmedTerms.some((s, i) => s !== terms[i])) { + variants.push(stemmedTerms.join(' AND ')); + } - // Prefix match on last term (for autocomplete-like behavior) - if (terms.length === 1 && terms[0].length >= 3) { - variants.push(`${terms[0]}*`); - } else if (terms.length > 1) { - const prefix = [...terms.slice(0, -1), `${terms[terms.length - 1]}*`]; - variants.push(prefix.join(' AND ')); + // OR fallback — any term matches (broadest) + if (terms.length > 1) { + variants.push(terms.join(' OR ')); } return variants; } + +/** + * Build a SQL LIKE pattern from search terms. + * Used as a final fallback when FTS5 returns no results. + * Example: "penalty offence" -> "%penalty%offence%" + */ +export function buildLikePattern(query: string): string { + const terms = query.trim().split(/\s+/).filter(t => t.length > 0); + if (terms.length === 0) return '%'; + return `%${terms.join('%')}%`; +} diff --git a/src/utils/metadata.ts b/src/utils/metadata.ts index 097d23c..929e279 100644 --- a/src/utils/metadata.ts +++ b/src/utils/metadata.ts @@ -9,6 +9,8 @@ export interface ResponseMetadata { jurisdiction: string; disclaimer: string; freshness?: string; + note?: string; + query_strategy?: string; } export interface ToolResponse {