fix: apply 5 fleet-wide bug fixes (dedup, wildcard, doc-id, fallback, metadata) (#10)
- Add deduplicateResults() to search-legislation and build-legal-stance - Upgrade fts-query with stemming, boolean passthrough, LIKE fallback, OR tier - Use resolveDocumentId() for document_id parameter in search tools - Disclose query_strategy and note in metadata on broadened/failed queries - Add note and query_strategy optional fields to ResponseMetadata interface Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
/**
|
||||
* build_legal_stance -- Build a comprehensive set of citations for a legal question.
|
||||
* build_legal_stance — Build a comprehensive set of citations for a legal question.
|
||||
*/
|
||||
|
||||
import type Database from '@ansvar/mcp-sqlite';
|
||||
import { buildFtsQueryVariants, sanitizeFtsInput } from '../utils/fts-query.js';
|
||||
import { buildFtsQueryVariants, buildLikePattern, sanitizeFtsInput } from '../utils/fts-query.js';
|
||||
import { resolveDocumentId } from '../utils/statute-id.js';
|
||||
import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js';
|
||||
|
||||
export interface BuildLegalStanceInput {
|
||||
@@ -31,8 +32,26 @@ export async function buildLegalStance(
|
||||
}
|
||||
|
||||
const limit = Math.min(Math.max(input.limit ?? 5, 1), 20);
|
||||
const fetchLimit = limit * 2;
|
||||
const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query));
|
||||
|
||||
// Resolve document_id from title if provided
|
||||
let resolvedDocId: string | undefined;
|
||||
if (input.document_id) {
|
||||
const resolved = resolveDocumentId(db, input.document_id);
|
||||
resolvedDocId = resolved ?? undefined;
|
||||
if (!resolved) {
|
||||
return {
|
||||
results: [],
|
||||
_metadata: {
|
||||
...generateResponseMetadata(db),
|
||||
note: `No document found matching "${input.document_id}"`,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let queryStrategy = 'none';
|
||||
for (const ftsQuery of queryVariants) {
|
||||
let sql = `
|
||||
SELECT
|
||||
@@ -50,23 +69,93 @@ export async function buildLegalStance(
|
||||
`;
|
||||
const params: (string | number)[] = [ftsQuery];
|
||||
|
||||
if (input.document_id) {
|
||||
if (resolvedDocId) {
|
||||
sql += ' AND lp.document_id = ?';
|
||||
params.push(input.document_id);
|
||||
params.push(resolvedDocId);
|
||||
}
|
||||
|
||||
sql += ' ORDER BY relevance LIMIT ?';
|
||||
params.push(limit);
|
||||
params.push(fetchLimit);
|
||||
|
||||
try {
|
||||
const rows = db.prepare(sql).all(...params) as LegalStanceResult[];
|
||||
if (rows.length > 0) {
|
||||
return { results: rows, _metadata: generateResponseMetadata(db) };
|
||||
queryStrategy = ftsQuery === queryVariants[0] ? 'exact' : 'fallback';
|
||||
const deduped = deduplicateResults(rows, limit);
|
||||
return {
|
||||
results: deduped,
|
||||
_metadata: {
|
||||
...generateResponseMetadata(db),
|
||||
...(queryStrategy === 'fallback' ? { query_strategy: 'broadened' } : {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// LIKE fallback — final tier when FTS5 returns no results
|
||||
{
|
||||
const likePattern = buildLikePattern(sanitizeFtsInput(input.query));
|
||||
let likeSql = `
|
||||
SELECT
|
||||
lp.document_id,
|
||||
ld.title as document_title,
|
||||
lp.provision_ref,
|
||||
lp.section,
|
||||
lp.title,
|
||||
substr(lp.content, 1, 300) as snippet,
|
||||
0 as relevance
|
||||
FROM legal_provisions lp
|
||||
JOIN legal_documents ld ON ld.id = lp.document_id
|
||||
WHERE lp.content LIKE ?
|
||||
`;
|
||||
const likeParams: (string | number)[] = [likePattern];
|
||||
|
||||
if (resolvedDocId) {
|
||||
likeSql += ' AND lp.document_id = ?';
|
||||
likeParams.push(resolvedDocId);
|
||||
}
|
||||
|
||||
likeSql += ' LIMIT ?';
|
||||
likeParams.push(fetchLimit);
|
||||
|
||||
try {
|
||||
const rows = db.prepare(likeSql).all(...likeParams) as LegalStanceResult[];
|
||||
if (rows.length > 0) {
|
||||
return {
|
||||
results: deduplicateResults(rows, limit),
|
||||
_metadata: {
|
||||
...generateResponseMetadata(db),
|
||||
query_strategy: 'like_fallback',
|
||||
},
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// LIKE query failed
|
||||
}
|
||||
}
|
||||
|
||||
return { results: [], _metadata: generateResponseMetadata(db) };
|
||||
}
|
||||
|
||||
/**
|
||||
* Deduplicate results by document_title + provision_ref.
|
||||
* Duplicate document IDs (numeric vs slug) cause the same provision to appear twice.
|
||||
*/
|
||||
function deduplicateResults(
|
||||
rows: LegalStanceResult[],
|
||||
limit: number,
|
||||
): LegalStanceResult[] {
|
||||
const seen = new Set<string>();
|
||||
const deduped: LegalStanceResult[] = [];
|
||||
for (const row of rows) {
|
||||
const key = `${row.document_title}::${row.provision_ref}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
deduped.push(row);
|
||||
if (deduped.length >= limit) break;
|
||||
}
|
||||
return deduped;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
/**
|
||||
* search_legislation -- Full-text search across Israeli statute provisions.
|
||||
* search_legislation — Full-text search across Israeli statute provisions.
|
||||
*/
|
||||
|
||||
import type Database from '@ansvar/mcp-sqlite';
|
||||
import { buildFtsQueryVariants, sanitizeFtsInput } from '../utils/fts-query.js';
|
||||
import { buildFtsQueryVariants, buildLikePattern, sanitizeFtsInput } from '../utils/fts-query.js';
|
||||
import { normalizeAsOfDate } from '../utils/as-of-date.js';
|
||||
import { resolveDocumentId } from '../utils/statute-id.js';
|
||||
import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js';
|
||||
|
||||
export interface SearchLegislationInput {
|
||||
@@ -38,8 +39,27 @@ export async function searchLegislation(
|
||||
}
|
||||
|
||||
const limit = Math.min(Math.max(input.limit ?? DEFAULT_LIMIT, 1), MAX_LIMIT);
|
||||
// Fetch extra rows to account for deduplication
|
||||
const fetchLimit = limit * 2;
|
||||
const queryVariants = buildFtsQueryVariants(sanitizeFtsInput(input.query));
|
||||
|
||||
// Resolve document_id from title if provided (same resolution as get_provision)
|
||||
let resolvedDocId: string | undefined;
|
||||
if (input.document_id) {
|
||||
const resolved = resolveDocumentId(db, input.document_id);
|
||||
resolvedDocId = resolved ?? undefined;
|
||||
if (!resolved) {
|
||||
return {
|
||||
results: [],
|
||||
_metadata: {
|
||||
...generateResponseMetadata(db),
|
||||
note: `No document found matching "${input.document_id}"`,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let queryStrategy = 'none';
|
||||
for (const ftsQuery of queryVariants) {
|
||||
let sql = `
|
||||
SELECT
|
||||
@@ -58,9 +78,9 @@ export async function searchLegislation(
|
||||
`;
|
||||
const params: (string | number)[] = [ftsQuery];
|
||||
|
||||
if (input.document_id) {
|
||||
if (resolvedDocId) {
|
||||
sql += ' AND lp.document_id = ?';
|
||||
params.push(input.document_id);
|
||||
params.push(resolvedDocId);
|
||||
}
|
||||
|
||||
if (input.status) {
|
||||
@@ -69,18 +89,95 @@ export async function searchLegislation(
|
||||
}
|
||||
|
||||
sql += ' ORDER BY relevance LIMIT ?';
|
||||
params.push(limit);
|
||||
params.push(fetchLimit);
|
||||
|
||||
try {
|
||||
const rows = db.prepare(sql).all(...params) as SearchLegislationResult[];
|
||||
if (rows.length > 0) {
|
||||
return { results: rows, _metadata: generateResponseMetadata(db) };
|
||||
queryStrategy = ftsQuery === queryVariants[0] ? 'exact' : 'fallback';
|
||||
const deduped = deduplicateResults(rows, limit);
|
||||
return {
|
||||
results: deduped,
|
||||
_metadata: {
|
||||
...generateResponseMetadata(db),
|
||||
...(queryStrategy === 'fallback' ? { query_strategy: 'broadened' } : {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// FTS query syntax error -- try next variant
|
||||
// FTS query syntax error — try next variant
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// LIKE fallback — final tier when FTS5 returns no results
|
||||
{
|
||||
const likePattern = buildLikePattern(sanitizeFtsInput(input.query));
|
||||
let likeSql = `
|
||||
SELECT
|
||||
lp.document_id,
|
||||
ld.title as document_title,
|
||||
lp.provision_ref,
|
||||
lp.chapter,
|
||||
lp.section,
|
||||
lp.title,
|
||||
substr(lp.content, 1, 200) as snippet,
|
||||
0 as relevance
|
||||
FROM legal_provisions lp
|
||||
JOIN legal_documents ld ON ld.id = lp.document_id
|
||||
WHERE lp.content LIKE ?
|
||||
`;
|
||||
const likeParams: (string | number)[] = [likePattern];
|
||||
|
||||
if (resolvedDocId) {
|
||||
likeSql += ' AND lp.document_id = ?';
|
||||
likeParams.push(resolvedDocId);
|
||||
}
|
||||
|
||||
if (input.status) {
|
||||
likeSql += ' AND ld.status = ?';
|
||||
likeParams.push(input.status);
|
||||
}
|
||||
|
||||
likeSql += ' LIMIT ?';
|
||||
likeParams.push(fetchLimit);
|
||||
|
||||
try {
|
||||
const rows = db.prepare(likeSql).all(...likeParams) as SearchLegislationResult[];
|
||||
if (rows.length > 0) {
|
||||
return {
|
||||
results: deduplicateResults(rows, limit),
|
||||
_metadata: {
|
||||
...generateResponseMetadata(db),
|
||||
query_strategy: 'like_fallback',
|
||||
},
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// LIKE query failed
|
||||
}
|
||||
}
|
||||
|
||||
return { results: [], _metadata: generateResponseMetadata(db) };
|
||||
}
|
||||
|
||||
/**
|
||||
* Deduplicate search results by document_title + provision_ref.
|
||||
* Duplicate document IDs (numeric vs slug) cause the same provision to appear twice.
|
||||
* Keeps the first (highest-ranked) occurrence.
|
||||
*/
|
||||
function deduplicateResults(
|
||||
rows: SearchLegislationResult[],
|
||||
limit: number,
|
||||
): SearchLegislationResult[] {
|
||||
const seen = new Set<string>();
|
||||
const deduped: SearchLegislationResult[] = [];
|
||||
for (const row of rows) {
|
||||
const key = `${row.document_title}::${row.provision_ref}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
deduped.push(row);
|
||||
if (deduped.length >= limit) break;
|
||||
}
|
||||
return deduped;
|
||||
}
|
||||
|
||||
@@ -4,49 +4,115 @@
|
||||
* Handles query sanitization and variant generation for SQLite FTS5.
|
||||
*/
|
||||
|
||||
const FTS5_BOOLEAN_OPS = /\b(AND|OR|NOT)\b/;
|
||||
|
||||
/**
|
||||
* Detect whether input contains FTS5 boolean operators.
|
||||
*/
|
||||
export function hasBooleanOperators(input: string): boolean {
|
||||
return FTS5_BOOLEAN_OPS.test(input);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize user input for safe FTS5 queries.
|
||||
* Removes characters that have special meaning in FTS5 syntax.
|
||||
* Preserves boolean operators (AND, OR, NOT) when detected.
|
||||
*/
|
||||
export function sanitizeFtsInput(input: string): string {
|
||||
if (hasBooleanOperators(input)) {
|
||||
// Preserve boolean structure: only strip dangerous chars, keep quotes and parens
|
||||
return input.replace(/[{}[\]^~*:]/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
// Preserve trailing * on words (FTS5 prefix search) but strip other special chars
|
||||
return input
|
||||
.replace(/['"(){}[\]^~*:]/g, ' ')
|
||||
.replace(/['"(){}[\]^~:]/g, ' ')
|
||||
.replace(/\*(?!\s|$)/g, ' ') // strip * unless at end of word
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate common English suffixes for stemming fallback.
|
||||
* Returns stem + "*" ready string, or null if no stemming possible.
|
||||
*/
|
||||
function stemWord(word: string): string | null {
|
||||
if (word.length < 5) return null;
|
||||
const lower = word.toLowerCase();
|
||||
for (const suffix of [
|
||||
'ies', 'ing', 'ers', 'tion', 'ment', 'ness',
|
||||
'able', 'ible', 'ous', 'ive', 'ed', 'es', 'er', 'ly', 's',
|
||||
]) {
|
||||
if (lower.endsWith(suffix) && lower.length - suffix.length >= 3) {
|
||||
return lower.slice(0, -suffix.length);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build FTS5 query variants for a search term.
|
||||
* Returns variants in order of specificity (most specific first):
|
||||
* 1. Exact phrase match
|
||||
* 2. All terms required (AND)
|
||||
* 3. Prefix match on last term
|
||||
* 3. Prefix AND (last term gets prefix wildcard)
|
||||
* 4. Stemmed prefix (suffix-truncated + wildcard)
|
||||
* 5. Any term matches (OR) — broad fallback
|
||||
*
|
||||
* When boolean operators are detected, passes query through as-is.
|
||||
*/
|
||||
export function buildFtsQueryVariants(sanitized: string): string[] {
|
||||
if (!sanitized || sanitized.trim().length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Boolean passthrough — user knows what they want
|
||||
if (hasBooleanOperators(sanitized)) {
|
||||
return [sanitized];
|
||||
}
|
||||
|
||||
const terms = sanitized.split(/\s+/).filter(t => t.length > 0);
|
||||
if (terms.length === 0) return [];
|
||||
|
||||
const variants: string[] = [];
|
||||
|
||||
// Exact phrase
|
||||
if (terms.length > 1) {
|
||||
// Exact phrase
|
||||
variants.push(`"${terms.join(' ')}"`);
|
||||
}
|
||||
|
||||
// AND query
|
||||
variants.push(terms.join(' AND '));
|
||||
|
||||
// Prefix match on last term (for autocomplete-like behavior)
|
||||
if (terms.length === 1 && terms[0].length >= 3) {
|
||||
// Prefix AND on last term
|
||||
variants.push([...terms.slice(0, -1), `${terms[terms.length - 1]}*`].join(' AND '));
|
||||
} else {
|
||||
// Single term
|
||||
variants.push(terms[0]);
|
||||
if (terms[0].length >= 3) {
|
||||
variants.push(`${terms[0]}*`);
|
||||
} else if (terms.length > 1) {
|
||||
const prefix = [...terms.slice(0, -1), `${terms[terms.length - 1]}*`];
|
||||
variants.push(prefix.join(' AND '));
|
||||
}
|
||||
}
|
||||
|
||||
// Stemmed variant — truncate suffixes + wildcard
|
||||
const stemmedTerms = terms.map(t => {
|
||||
const stem = stemWord(t);
|
||||
return stem ? `${stem}*` : t;
|
||||
});
|
||||
if (stemmedTerms.some((s, i) => s !== terms[i])) {
|
||||
variants.push(stemmedTerms.join(' AND '));
|
||||
}
|
||||
|
||||
// OR fallback — any term matches (broadest)
|
||||
if (terms.length > 1) {
|
||||
variants.push(terms.join(' OR '));
|
||||
}
|
||||
|
||||
return variants;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a SQL LIKE pattern from search terms.
|
||||
* Used as a final fallback when FTS5 returns no results.
|
||||
* Example: "penalty offence" -> "%penalty%offence%"
|
||||
*/
|
||||
export function buildLikePattern(query: string): string {
|
||||
const terms = query.trim().split(/\s+/).filter(t => t.length > 0);
|
||||
if (terms.length === 0) return '%';
|
||||
return `%${terms.join('%')}%`;
|
||||
}
|
||||
|
||||
@@ -9,6 +9,8 @@ export interface ResponseMetadata {
|
||||
jurisdiction: string;
|
||||
disclaimer: string;
|
||||
freshness?: string;
|
||||
note?: string;
|
||||
query_strategy?: string;
|
||||
}
|
||||
|
||||
export interface ToolResponse<T> {
|
||||
|
||||
Reference in New Issue
Block a user