Merge pull request #11 from Ansvar-Systems/dev

fix: fleet-wide bug fixes — dedup, wildcard, doc-id, fallback, metadata
This commit is contained in:
Jeffrey von Rotz
2026-03-06 09:14:35 +01:00
committed by GitHub
5 changed files with 312 additions and 52 deletions

View File

@@ -1,10 +1,8 @@
/** /**
* about -- Server metadata, dataset statistics, and provenance. * about Server metadata, dataset statistics, and provenance.
*/ */
import type Database from '@ansvar/mcp-sqlite'; import type Database from '@ansvar/mcp-sqlite';
import { detectCapabilities, readDbMetadata } from '../capabilities.js';
import { SERVER_NAME, SERVER_VERSION, REPOSITORY_URL } from '../constants.js';
export interface AboutContext { export interface AboutContext {
version: string; version: string;
@@ -22,34 +20,42 @@ function safeCount(db: InstanceType<typeof Database>, sql: string): number {
} }
export function getAbout(db: InstanceType<typeof Database>, context: AboutContext) { export function getAbout(db: InstanceType<typeof Database>, context: AboutContext) {
const caps = detectCapabilities(db);
const meta = readDbMetadata(db);
return { const euRefs = safeCount(db, 'SELECT COUNT(*) as count FROM eu_references');
server: SERVER_NAME,
version: context.version, const stats: Record<string, number> = {
repository: REPOSITORY_URL,
database: {
fingerprint: context.fingerprint,
built_at: context.dbBuilt,
tier: meta.tier,
schema_version: meta.schema_version,
capabilities: [...caps],
},
statistics: {
documents: safeCount(db, 'SELECT COUNT(*) as count FROM legal_documents'), documents: safeCount(db, 'SELECT COUNT(*) as count FROM legal_documents'),
provisions: safeCount(db, 'SELECT COUNT(*) as count FROM legal_provisions'), provisions: safeCount(db, 'SELECT COUNT(*) as count FROM legal_provisions'),
definitions: safeCount(db, 'SELECT COUNT(*) as count FROM definitions'), definitions: safeCount(db, 'SELECT COUNT(*) as count FROM definitions'),
eu_documents: safeCount(db, 'SELECT COUNT(*) as count FROM eu_documents'), };
eu_references: safeCount(db, 'SELECT COUNT(*) as count FROM eu_references'),
}, if (euRefs > 0) {
data_source: { stats.eu_documents = safeCount(db, 'SELECT COUNT(*) as count FROM eu_documents');
name: 'Knesset Legislation Database', stats.eu_references = euRefs;
authority: 'The Knesset (Israeli Parliament)', }
url: 'https://main.knesset.gov.il/Activity/Legislation',
license: 'Government Open Data', return {
name: 'Israel Law MCP',
version: context.version,
jurisdiction: 'IL', jurisdiction: 'IL',
languages: ['he', 'en'], description: 'Israel Law MCP — legislation via Model Context Protocol',
stats,
data_sources: [
{
name: 'Knesset Legislation Database',
url: 'https://main.knesset.gov.il',
authority: 'Knesset (Israeli Parliament)',
},
],
freshness: {
database_built: context.dbBuilt,
},
disclaimer:
'This is a research tool, not legal advice. Verify critical citations against official sources.',
network: {
name: 'Ansvar MCP Network',
open_law: 'https://ansvar.eu/open-law',
directory: 'https://ansvar.ai/mcp',
}, },
}; };
} }

View File

@@ -1,9 +1,10 @@
/** /**
* build_legal_stance -- Build a comprehensive set of citations for a legal question. * build_legal_stance Build a comprehensive set of citations for a legal question.
*/ */
import type Database from '@ansvar/mcp-sqlite'; import type Database from '@ansvar/mcp-sqlite';
import { buildFtsQueryVariants, sanitizeFtsInput } from '../utils/fts-query.js'; import { buildFtsQueryVariants, buildLikePattern, sanitizeFtsInput } from '../utils/fts-query.js';
import { resolveDocumentId } from '../utils/statute-id.js';
import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js'; import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js';
export interface BuildLegalStanceInput { export interface BuildLegalStanceInput {
@@ -31,8 +32,26 @@ export async function buildLegalStance(
} }
const limit = Math.min(Math.max(input.limit ?? 5, 1), 20); const limit = Math.min(Math.max(input.limit ?? 5, 1), 20);
const fetchLimit = limit * 2;
const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query)); const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query));
// Resolve document_id from title if provided
let resolvedDocId: string | undefined;
if (input.document_id) {
const resolved = resolveDocumentId(db, input.document_id);
resolvedDocId = resolved ?? undefined;
if (!resolved) {
return {
results: [],
_metadata: {
...generateResponseMetadata(db),
note: `No document found matching "${input.document_id}"`,
},
};
}
}
let queryStrategy = 'none';
for (const ftsQuery of queryVariants) { for (const ftsQuery of queryVariants) {
let sql = ` let sql = `
SELECT SELECT
@@ -50,23 +69,93 @@ export async function buildLegalStance(
`; `;
const params: (string | number)[] = [ftsQuery]; const params: (string | number)[] = [ftsQuery];
if (input.document_id) { if (resolvedDocId) {
sql += ' AND lp.document_id = ?'; sql += ' AND lp.document_id = ?';
params.push(input.document_id); params.push(resolvedDocId);
} }
sql += ' ORDER BY relevance LIMIT ?'; sql += ' ORDER BY relevance LIMIT ?';
params.push(limit); params.push(fetchLimit);
try { try {
const rows = db.prepare(sql).all(...params) as LegalStanceResult[]; const rows = db.prepare(sql).all(...params) as LegalStanceResult[];
if (rows.length > 0) { if (rows.length > 0) {
return { results: rows, _metadata: generateResponseMetadata(db) }; queryStrategy = ftsQuery === queryVariants[0] ? 'exact' : 'fallback';
const deduped = deduplicateResults(rows, limit);
return {
results: deduped,
_metadata: {
...generateResponseMetadata(db),
...(queryStrategy === 'fallback' ? { query_strategy: 'broadened' } : {}),
},
};
} }
} catch { } catch {
continue; continue;
} }
} }
// LIKE fallback — final tier when FTS5 returns no results
{
const likePattern = buildLikePattern(sanitizeFtsInput(input.query));
let likeSql = `
SELECT
lp.document_id,
ld.title as document_title,
lp.provision_ref,
lp.section,
lp.title,
substr(lp.content, 1, 300) as snippet,
0 as relevance
FROM legal_provisions lp
JOIN legal_documents ld ON ld.id = lp.document_id
WHERE lp.content LIKE ?
`;
const likeParams: (string | number)[] = [likePattern];
if (resolvedDocId) {
likeSql += ' AND lp.document_id = ?';
likeParams.push(resolvedDocId);
}
likeSql += ' LIMIT ?';
likeParams.push(fetchLimit);
try {
const rows = db.prepare(likeSql).all(...likeParams) as LegalStanceResult[];
if (rows.length > 0) {
return {
results: deduplicateResults(rows, limit),
_metadata: {
...generateResponseMetadata(db),
query_strategy: 'like_fallback',
},
};
}
} catch {
// LIKE query failed
}
}
return { results: [], _metadata: generateResponseMetadata(db) }; return { results: [], _metadata: generateResponseMetadata(db) };
} }
/**
* Deduplicate results by document_title + provision_ref.
* Duplicate document IDs (numeric vs slug) cause the same provision to appear twice.
*/
function deduplicateResults(
rows: LegalStanceResult[],
limit: number,
): LegalStanceResult[] {
const seen = new Set<string>();
const deduped: LegalStanceResult[] = [];
for (const row of rows) {
const key = `${row.document_title}::${row.provision_ref}`;
if (seen.has(key)) continue;
seen.add(key);
deduped.push(row);
if (deduped.length >= limit) break;
}
return deduped;
}

View File

@@ -1,10 +1,11 @@
/** /**
* search_legislation -- Full-text search across Israeli statute provisions. * search_legislation Full-text search across Israeli statute provisions.
*/ */
import type Database from '@ansvar/mcp-sqlite'; import type Database from '@ansvar/mcp-sqlite';
import { buildFtsQueryVariants, sanitizeFtsInput } from '../utils/fts-query.js'; import { buildFtsQueryVariants, buildLikePattern, sanitizeFtsInput } from '../utils/fts-query.js';
import { normalizeAsOfDate } from '../utils/as-of-date.js'; import { normalizeAsOfDate } from '../utils/as-of-date.js';
import { resolveDocumentId } from '../utils/statute-id.js';
import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js'; import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js';
export interface SearchLegislationInput { export interface SearchLegislationInput {
@@ -38,8 +39,27 @@ export async function searchLegislation(
} }
const limit = Math.min(Math.max(input.limit ?? DEFAULT_LIMIT, 1), MAX_LIMIT); const limit = Math.min(Math.max(input.limit ?? DEFAULT_LIMIT, 1), MAX_LIMIT);
// Fetch extra rows to account for deduplication
const fetchLimit = limit * 2;
const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query)); const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query));
// Resolve document_id from title if provided (same resolution as get_provision)
let resolvedDocId: string | undefined;
if (input.document_id) {
const resolved = resolveDocumentId(db, input.document_id);
resolvedDocId = resolved ?? undefined;
if (!resolved) {
return {
results: [],
_metadata: {
...generateResponseMetadata(db),
note: `No document found matching "${input.document_id}"`,
},
};
}
}
let queryStrategy = 'none';
for (const ftsQuery of queryVariants) { for (const ftsQuery of queryVariants) {
let sql = ` let sql = `
SELECT SELECT
@@ -58,9 +78,9 @@ export async function searchLegislation(
`; `;
const params: (string | number)[] = [ftsQuery]; const params: (string | number)[] = [ftsQuery];
if (input.document_id) { if (resolvedDocId) {
sql += ' AND lp.document_id = ?'; sql += ' AND lp.document_id = ?';
params.push(input.document_id); params.push(resolvedDocId);
} }
if (input.status) { if (input.status) {
@@ -69,18 +89,95 @@ export async function searchLegislation(
} }
sql += ' ORDER BY relevance LIMIT ?'; sql += ' ORDER BY relevance LIMIT ?';
params.push(limit); params.push(fetchLimit);
try { try {
const rows = db.prepare(sql).all(...params) as SearchLegislationResult[]; const rows = db.prepare(sql).all(...params) as SearchLegislationResult[];
if (rows.length > 0) { if (rows.length > 0) {
return { results: rows, _metadata: generateResponseMetadata(db) }; queryStrategy = ftsQuery === queryVariants[0] ? 'exact' : 'fallback';
const deduped = deduplicateResults(rows, limit);
return {
results: deduped,
_metadata: {
...generateResponseMetadata(db),
...(queryStrategy === 'fallback' ? { query_strategy: 'broadened' } : {}),
},
};
} }
} catch { } catch {
// FTS query syntax error -- try next variant // FTS query syntax error try next variant
continue; continue;
} }
} }
// LIKE fallback — final tier when FTS5 returns no results
{
const likePattern = buildLikePattern(sanitizeFtsInput(input.query));
let likeSql = `
SELECT
lp.document_id,
ld.title as document_title,
lp.provision_ref,
lp.chapter,
lp.section,
lp.title,
substr(lp.content, 1, 200) as snippet,
0 as relevance
FROM legal_provisions lp
JOIN legal_documents ld ON ld.id = lp.document_id
WHERE lp.content LIKE ?
`;
const likeParams: (string | number)[] = [likePattern];
if (resolvedDocId) {
likeSql += ' AND lp.document_id = ?';
likeParams.push(resolvedDocId);
}
if (input.status) {
likeSql += ' AND ld.status = ?';
likeParams.push(input.status);
}
likeSql += ' LIMIT ?';
likeParams.push(fetchLimit);
try {
const rows = db.prepare(likeSql).all(...likeParams) as SearchLegislationResult[];
if (rows.length > 0) {
return {
results: deduplicateResults(rows, limit),
_metadata: {
...generateResponseMetadata(db),
query_strategy: 'like_fallback',
},
};
}
} catch {
// LIKE query failed
}
}
return { results: [], _metadata: generateResponseMetadata(db) }; return { results: [], _metadata: generateResponseMetadata(db) };
} }
/**
* Deduplicate search results by document_title + provision_ref.
* Duplicate document IDs (numeric vs slug) cause the same provision to appear twice.
* Keeps the first (highest-ranked) occurrence.
*/
function deduplicateResults(
rows: SearchLegislationResult[],
limit: number,
): SearchLegislationResult[] {
const seen = new Set<string>();
const deduped: SearchLegislationResult[] = [];
for (const row of rows) {
const key = `${row.document_title}::${row.provision_ref}`;
if (seen.has(key)) continue;
seen.add(key);
deduped.push(row);
if (deduped.length >= limit) break;
}
return deduped;
}

View File

@@ -4,49 +4,115 @@
* Handles query sanitization and variant generation for SQLite FTS5. * Handles query sanitization and variant generation for SQLite FTS5.
*/ */
const FTS5_BOOLEAN_OPS = /\b(AND|OR|NOT)\b/;
/**
* Detect whether input contains FTS5 boolean operators.
*/
export function hasBooleanOperators(input: string): boolean {
return FTS5_BOOLEAN_OPS.test(input);
}
/** /**
* Sanitize user input for safe FTS5 queries. * Sanitize user input for safe FTS5 queries.
* Removes characters that have special meaning in FTS5 syntax. * Preserves boolean operators (AND, OR, NOT) when detected.
*/ */
export function sanitizeFtsInput(input: string): string { export function sanitizeFtsInput(input: string): string {
if (hasBooleanOperators(input)) {
// Preserve boolean structure: only strip dangerous chars, keep quotes and parens
return input.replace(/[{}[\]^~*:]/g, ' ').replace(/\s+/g, ' ').trim();
}
// Preserve trailing * on words (FTS5 prefix search) but strip other special chars
return input return input
.replace(/['"(){}[\]^~*:]/g, ' ') .replace(/['"(){}[\]^~:]/g, ' ')
.replace(/\*(?!\s|$)/g, ' ') // strip * unless at end of word
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')
.trim(); .trim();
} }
/**
* Truncate common English suffixes for stemming fallback.
* Returns stem + "*" ready string, or null if no stemming possible.
*/
function stemWord(word: string): string | null {
if (word.length < 5) return null;
const lower = word.toLowerCase();
for (const suffix of [
'ies', 'ing', 'ers', 'tion', 'ment', 'ness',
'able', 'ible', 'ous', 'ive', 'ed', 'es', 'er', 'ly', 's',
]) {
if (lower.endsWith(suffix) && lower.length - suffix.length >= 3) {
return lower.slice(0, -suffix.length);
}
}
return null;
}
/** /**
* Build FTS5 query variants for a search term. * Build FTS5 query variants for a search term.
* Returns variants in order of specificity (most specific first): * Returns variants in order of specificity (most specific first):
* 1. Exact phrase match * 1. Exact phrase match
* 2. All terms required (AND) * 2. All terms required (AND)
* 3. Prefix match on last term * 3. Prefix AND (last term gets prefix wildcard)
* 4. Stemmed prefix (suffix-truncated + wildcard)
* 5. Any term matches (OR) — broad fallback
*
* When boolean operators are detected, passes query through as-is.
*/ */
export function buildFtsQueryVariants(sanitized: string): string[] { export function buildFtsQueryVariants(sanitized: string): string[] {
if (!sanitized || sanitized.trim().length === 0) { if (!sanitized || sanitized.trim().length === 0) {
return []; return [];
} }
// Boolean passthrough — user knows what they want
if (hasBooleanOperators(sanitized)) {
return [sanitized];
}
const terms = sanitized.split(/\s+/).filter(t => t.length > 0); const terms = sanitized.split(/\s+/).filter(t => t.length > 0);
if (terms.length === 0) return []; if (terms.length === 0) return [];
const variants: string[] = []; const variants: string[] = [];
// Exact phrase
if (terms.length > 1) { if (terms.length > 1) {
// Exact phrase
variants.push(`"${terms.join(' ')}"`); variants.push(`"${terms.join(' ')}"`);
}
// AND query // AND query
variants.push(terms.join(' AND ')); variants.push(terms.join(' AND '));
// Prefix AND on last term
// Prefix match on last term (for autocomplete-like behavior) variants.push([...terms.slice(0, -1), `${terms[terms.length - 1]}*`].join(' AND '));
if (terms.length === 1 && terms[0].length >= 3) { } else {
// Single term
variants.push(terms[0]);
if (terms[0].length >= 3) {
variants.push(`${terms[0]}*`); variants.push(`${terms[0]}*`);
} else if (terms.length > 1) { }
const prefix = [...terms.slice(0, -1), `${terms[terms.length - 1]}*`]; }
variants.push(prefix.join(' AND '));
// Stemmed variant — truncate suffixes + wildcard
const stemmedTerms = terms.map(t => {
const stem = stemWord(t);
return stem ? `${stem}*` : t;
});
if (stemmedTerms.some((s, i) => s !== terms[i])) {
variants.push(stemmedTerms.join(' AND '));
}
// OR fallback — any term matches (broadest)
if (terms.length > 1) {
variants.push(terms.join(' OR '));
} }
return variants; return variants;
} }
/**
* Build a SQL LIKE pattern from search terms.
* Used as a final fallback when FTS5 returns no results.
* Example: "penalty offence" -> "%penalty%offence%"
*/
export function buildLikePattern(query: string): string {
const terms = query.trim().split(/\s+/).filter(t => t.length > 0);
if (terms.length === 0) return '%';
return `%${terms.join('%')}%`;
}

View File

@@ -9,6 +9,8 @@ export interface ResponseMetadata {
jurisdiction: string; jurisdiction: string;
disclaimer: string; disclaimer: string;
freshness?: string; freshness?: string;
note?: string;
query_strategy?: string;
} }
export interface ToolResponse<T> { export interface ToolResponse<T> {