From 8145605d0f2d07ae8376c7985ca0b14a2e0d1d31 Mon Sep 17 00:00:00 2001 From: Mortalus Date: Sun, 22 Feb 2026 20:43:10 +0100 Subject: [PATCH] ci: add weekly data freshness check workflow --- .github/workflows/check-updates.yml | 133 +++++++++++++++++++++++ package.json | 3 +- scripts/check-updates.ts | 159 ++++++++++++++++++++++++++++ 3 files changed, 294 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/check-updates.yml create mode 100644 scripts/check-updates.ts diff --git a/.github/workflows/check-updates.yml b/.github/workflows/check-updates.yml new file mode 100644 index 0000000..24d9846 --- /dev/null +++ b/.github/workflows/check-updates.yml @@ -0,0 +1,133 @@ +name: Daily Data Freshness Check + +on: + schedule: + - cron: '0 5 * * 1' + workflow_dispatch: + +permissions: + contents: read + issues: write + +jobs: + check-updates: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '22' + cache: npm + + - name: Install dependencies + run: npm ci --ignore-scripts + + - name: Build + run: npm run build + + - name: Check upstream freshness + id: check + shell: bash + run: | + set +e + npm run check-updates 2>&1 | tee check-updates.log + STATUS=${PIPESTATUS[0]} + echo "status_code=$STATUS" >> "$GITHUB_OUTPUT" + if [ "$STATUS" -eq 1 ]; then + echo "updates_found=true" >> "$GITHUB_OUTPUT" + elif [ "$STATUS" -eq 0 ]; then + echo "updates_found=false" >> "$GITHUB_OUTPUT" + else + echo "updates_found=false" >> "$GITHUB_OUTPUT" + echo "check_error=true" >> "$GITHUB_OUTPUT" + fi + exit 0 + + - name: Upload check output + if: always() + uses: actions/upload-artifact@v4 + with: + name: check-updates-log + path: check-updates.log + retention-days: 14 + + report: + needs: check-updates + if: always() + runs-on: ubuntu-latest + + steps: + - name: Create or update issue when updates found + if: needs.check-updates.outputs.updates_found == 'true' || needs.check-updates.outputs.check_error == 'true' + uses: actions/github-script@v7 + with: + script: | + const updatesFound = '${{ needs.check-updates.outputs.updates_found }}' === 'true'; + const checkError = '${{ needs.check-updates.outputs.check_error }}' === 'true'; + + const title = checkError + ? 'Data Freshness: Check errors detected' + : 'Data Freshness: Updates available from upstream source'; + + const body = [ + `**Automated freshness check:** ${new Date().toISOString()}`, + '', + `| Metric | Value |`, + `|--------|-------|`, + `| Updates found | ${updatesFound} |`, + `| Check errors | ${checkError} |`, + `| Exit code | ${{ needs.check-updates.outputs.status_code }} |`, + '', + '### Recommended actions', + '1. Run `npm run check-updates` locally for details', + '2. If updates available, run ingestion and rebuild database', + '3. Re-run contract tests before release', + ].join('\n'); + + const { data: issues } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'data-update', + }); + + if (issues.length > 0) { + await github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issues[0].number, + title, + body, + }); + } else { + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title, + body, + labels: ['data-update', 'automated'], + }); + } + + - name: Close stale issues when clean + if: needs.check-updates.outputs.updates_found != 'true' && needs.check-updates.outputs.check_error != 'true' + uses: actions/github-script@v7 + with: + script: | + const { data: issues } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'data-update', + }); + for (const issue of issues) { + await github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + state: 'closed', + state_reason: 'completed', + }); + } diff --git a/package.json b/package.json index 1747b72..f70959a 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,8 @@ "validate": "npm run lint && npm test && npm run test:contract", "lint": "tsc --noEmit", "prepublishOnly": "npm run build", - "postinstall": "test -d dist || npm run build || true" + "postinstall": "test -d dist || npm run build || true", + "check-updates": "node --import tsx scripts/check-updates.ts" }, "dependencies": { "@ansvar/mcp-sqlite": "^1.0.3", diff --git a/scripts/check-updates.ts b/scripts/check-updates.ts new file mode 100644 index 0000000..a57f421 --- /dev/null +++ b/scripts/check-updates.ts @@ -0,0 +1,159 @@ +#!/usr/bin/env tsx +/** + * Israeli Law MCP — Data Freshness Checker + * + * Checks whether the local database is stale or missing expected legislation. + * + * Detection strategy: + * 1. Database age — flags if build_date > MAX_AGE days old + * 2. Document count — compares DB rows against census.json expected count + * 3. Source portal — verifies the official legal portal is reachable + * + * Exit codes: + * 0 = database is fresh, no updates detected + * 1 = updates detected (stale DB, missing documents, or new content upstream) + * 2 = check failed (DB missing, portal unreachable, unexpected error) + */ + +import { existsSync, readFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const DB_PATH = resolve(__dirname, '../data/database.db'); +const CENSUS_PATH = resolve(__dirname, '../data/census.json'); + +const MAX_DB_AGE_DAYS = Number(process.env['MAX_DB_AGE_DAYS'] ?? '90'); +const PORTAL_URL = 'https://www.nevo.co.il'; +const PORTAL_NAME = 'Nevo Legal Database'; + +interface CensusSummary { + total_laws?: number; + total_ingestable?: number; + total_ingested?: number; + [key: string]: unknown; +} + +function daysSince(isoDate: string): number | null { + const dt = new Date(isoDate); + if (Number.isNaN(dt.getTime())) return null; + return Math.floor((Date.now() - dt.getTime()) / (1000 * 60 * 60 * 24)); +} + +async function checkPortal(url: string): Promise { + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 15_000); + const res = await fetch(url, { + method: 'HEAD', + signal: controller.signal, + headers: { 'User-Agent': '@ansvar/israeli-law-mcp/1.0 (data-freshness-check)' }, + }); + clearTimeout(timeout); + return res.ok || res.status === 301 || res.status === 302 || res.status === 403; + } catch { + return false; + } +} + +async function main(): Promise { + console.log('Israeli Law MCP — Data Freshness Check'); + console.log(`Portal: ${PORTAL_NAME} (${PORTAL_URL})`); + console.log(''); + + // --- 1. Database existence --- + if (!existsSync(DB_PATH)) { + console.error('ERROR: Database not found at', DB_PATH); + console.error('Run "npm run build:db" first.'); + process.exit(2); + } + + // --- 2. Database age check --- + let updatesNeeded = false; + const { default: Database } = await import('@ansvar/mcp-sqlite'); + const db = new Database(DB_PATH, { readonly: true }); + + let buildDate: string | null = null; + try { + const row = db.prepare("SELECT value FROM db_metadata WHERE key = 'build_date'").get() as { value: string } | undefined; + buildDate = row?.value ?? null; + } catch { + // db_metadata table may not exist + } + + if (buildDate) { + const age = daysSince(buildDate); + if (age !== null && age > MAX_DB_AGE_DAYS) { + console.log(`STALE: Database is ${age} days old (threshold: ${MAX_DB_AGE_DAYS} days)`); + updatesNeeded = true; + } else if (age !== null) { + console.log(`OK: Database is ${age} days old (threshold: ${MAX_DB_AGE_DAYS} days)`); + } + } else { + console.log('WARN: No build_date in db_metadata — cannot assess age'); + } + + // --- 3. Document count check --- + let dbDocCount = 0; + let dbProvCount = 0; + try { + const docRow = db.prepare("SELECT COUNT(*) as count FROM legal_documents").get() as { count: number }; + dbDocCount = docRow.count; + console.log(`DB documents: ${dbDocCount}`); + } catch { + console.log('WARN: Cannot count legal_documents'); + } + + try { + const provRow = db.prepare("SELECT COUNT(*) as count FROM legal_provisions").get() as { count: number }; + dbProvCount = provRow.count; + console.log(`DB provisions: ${dbProvCount}`); + } catch { + console.log('WARN: Cannot count legal_provisions'); + } + + // Compare against census if available + if (existsSync(CENSUS_PATH)) { + try { + const census = JSON.parse(readFileSync(CENSUS_PATH, 'utf-8')) as { summary?: CensusSummary }; + const expected = census.summary?.total_ingested ?? census.summary?.total_ingestable ?? census.summary?.total_laws; + if (expected && dbDocCount < expected) { + console.log(`MISSING: DB has ${dbDocCount} documents but census expects ${expected}`); + updatesNeeded = true; + } else if (expected) { + console.log(`OK: DB documents (${dbDocCount}) >= census expected (${expected})`); + } + } catch { + console.log('WARN: Could not parse census.json'); + } + } else { + console.log('INFO: No census.json — skipping count comparison'); + } + + db.close(); + + // --- 4. Source portal reachability --- + console.log(''); + console.log(`Checking portal: ${PORTAL_URL}`); + const portalOk = await checkPortal(PORTAL_URL); + if (portalOk) { + console.log(`OK: ${PORTAL_NAME} is reachable`); + } else { + console.log(`WARN: ${PORTAL_NAME} is unreachable — manual check recommended`); + } + + // --- Result --- + console.log(''); + if (updatesNeeded) { + console.log('RESULT: Updates detected — re-ingestion recommended'); + process.exit(1); + } else { + console.log('RESULT: Database appears current — no updates needed'); + process.exit(0); + } +} + +main().catch((err) => { + console.error('Unexpected error:', err); + process.exit(2); +});