From 438f63ecc3b4912f0ff23d78945f80ac0f663fe4 Mon Sep 17 00:00:00 2001 From: Jeffrey von Rotz Date: Tue, 7 Apr 2026 16:37:31 +0200 Subject: [PATCH] feat: add _citation metadata for deterministic citation pipeline (#17) * feat: add citation metadata utility * feat: add _citation metadata to get-provision response * fix: add _citation to ToolResponse interface --- src/tools/get-provision.ts | 10 +++ src/utils/citation.ts | 153 +++++++++++++++++++++++++++++++++++++ src/utils/metadata.ts | 1 + 3 files changed, 164 insertions(+) create mode 100644 src/utils/citation.ts diff --git a/src/tools/get-provision.ts b/src/tools/get-provision.ts index 7186b7e..244f5ac 100644 --- a/src/tools/get-provision.ts +++ b/src/tools/get-provision.ts @@ -5,6 +5,7 @@ import type Database from '@ansvar/mcp-sqlite'; import { resolveDocumentId } from '../utils/statute-id.js'; import { generateResponseMetadata, type ToolResponse } from '../utils/metadata.js'; +import { buildProvisionCitation } from '../utils/citation.js'; export interface GetProvisionInput { document_id: string; @@ -91,6 +92,15 @@ export async function getProvision( article_number: String(provision.provision_ref).replace(/^sec/, ''), url: docRow.url ?? undefined, }], + _citation: buildProvisionCitation( + resolvedId, + docRow.title || '', + String(provision.provision_ref) || ref || '', + input.document_id, + input.section || input.provision_ref || '', + docRow.url || null, + null, + ), _metadata: generateResponseMetadata(db), }; } diff --git a/src/utils/citation.ts b/src/utils/citation.ts new file mode 100644 index 0000000..de401ab --- /dev/null +++ b/src/utils/citation.ts @@ -0,0 +1,153 @@ +/** + * Citation metadata for the deterministic citation pipeline. + * + * Provides structured identifiers (canonical_ref, display_text, aliases) + * that the platform's entity linker uses to match references in agent + * responses to MCP tool results — without relying on LLM formatting. + * + * This is the UNIVERSAL template — works for all MCP types (law, sector, + * agriculture, domain). Each MCP adapts the builder call to its own + * field names. + * + * See: docs/guides/law-mcp-golden-standard.md Section 4.9c + */ + +export interface CitationMetadata { + canonical_ref: string; + display_text: string; + aliases?: string[]; + source_url?: string; + lookup: { + tool: string; + args: Record; + }; +} + +/** + * Build citation metadata for any retrieval tool response. + * + * @param canonicalRef Primary reference the entity linker matches against + * (e.g., "SFS 2018:218", "GDPR Article 33", "CVE-2024-1234") + * @param displayText How the reference appears in prose + * (e.g., "34 § SFS 2018:218", "Article 33 of GDPR") + * @param toolName The MCP tool name (e.g., "get_provision", "get_article") + * @param toolArgs The tool arguments for verification lookup + * @param sourceUrl Official portal URL (optional) + * @param aliases Alternative names the LLM might use (optional) + */ +export function buildCitation( + canonicalRef: string, + displayText: string, + toolName: string, + toolArgs: Record, + sourceUrl?: string | null, + aliases?: string[], +): CitationMetadata { + return { + canonical_ref: canonicalRef, + display_text: displayText, + ...(aliases && aliases.length > 0 && { aliases }), + ...(sourceUrl && { source_url: sourceUrl }), + lookup: { + tool: toolName, + args: toolArgs, + }, + }; +} + +/** + * Build citation metadata for a law MCP get_provision response. + * + * Handles Swedish-style YYYY:NNN statute IDs, chapter:section notation, + * and short-name aliases. Other jurisdictions adapt field names. + * + * @param documentId DB identifier (e.g., "2018:218", "LOV-2018-06-15-38") + * @param documentTitle Full title of the law + * @param provisionRef Provision reference (e.g., "34", "3:12") + * @param inputDocId The document_id argument as passed by the caller + * @param inputSection The section argument as passed by the caller + * @param sourceUrl Official portal URL (optional) + * @param shortName Short name / alias (optional) + */ +export function buildProvisionCitation( + documentId: string, + documentTitle: string, + provisionRef: string, + inputDocId: string, + inputSection: string, + sourceUrl?: string | null, + shortName?: string | null, +): CitationMetadata { + // Build canonical_ref — detect common statute ID formats + let canonicalRef: string; + if (documentId.match(/^\d{4}:\d+$/)) { + // Swedish SFS format: "2018:218" → "SFS 2018:218" + canonicalRef = `SFS ${documentId}`; + } else if (documentId.match(/^LOV-\d{4}/)) { + // Norwegian Lovdata format + canonicalRef = documentId; + } else { + canonicalRef = documentTitle || documentId; + } + + // Build display_text with provision reference + let displayText: string; + if (provisionRef && provisionRef.includes(':')) { + // Chapter:section format (e.g., "3:12" → "3 kap. 12 §") + const [ch, sec] = provisionRef.split(':'); + displayText = `${ch} kap. ${sec} § ${canonicalRef}`; + } else if (provisionRef) { + displayText = `§ ${provisionRef} ${canonicalRef}`; + } else { + displayText = canonicalRef; + } + + // Build aliases + const aliases: string[] = []; + if (shortName) aliases.push(shortName); + if (documentId !== canonicalRef) aliases.push(documentId); + if (documentTitle && documentTitle !== canonicalRef) aliases.push(documentTitle); + + return { + canonical_ref: canonicalRef, + display_text: displayText, + ...(aliases.length > 0 && { aliases }), + ...(sourceUrl && { source_url: sourceUrl }), + lookup: { + tool: 'get_provision', + args: { document_id: inputDocId, section: inputSection }, + }, + }; +} + +/** + * Build citation for a sector regulator decision/regulation. + * + * @param reference Decision/regulation reference (e.g., "FFFS 2024:1") + * @param title Full title + * @param toolName Tool name (e.g., "se_dp_get_decision") + * @param toolArgs Tool arguments + * @param authority Issuing authority (e.g., "IMY", "FI") + * @param sourceUrl Official URL (optional) + */ +export function buildRegulationCitation( + reference: string, + title: string, + toolName: string, + toolArgs: Record, + authority?: string | null, + sourceUrl?: string | null, +): CitationMetadata { + const canonicalRef = reference; + const displayText = title || reference; + const aliases: string[] = []; + if (authority) aliases.push(`${authority}: ${reference}`); + + return { + canonical_ref: canonicalRef, + display_text: displayText, + ...(aliases.length > 0 && { aliases }), + ...(sourceUrl && { source_url: sourceUrl }), + lookup: { tool: toolName, args: toolArgs }, + }; +} diff --git a/src/utils/metadata.ts b/src/utils/metadata.ts index 929e279..b24a062 100644 --- a/src/utils/metadata.ts +++ b/src/utils/metadata.ts @@ -16,6 +16,7 @@ export interface ResponseMetadata { export interface ToolResponse { results: T; _metadata: ResponseMetadata; + _citation?: import('./citation.js').CitationMetadata; } export function generateResponseMetadata(