ci: add weekly data freshness check workflow
This commit is contained in:
133
.github/workflows/check-updates.yml
vendored
Normal file
133
.github/workflows/check-updates.yml
vendored
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
name: Daily Data Freshness Check
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 5 * * 1'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
check-updates:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '22'
|
||||||
|
cache: npm
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci --ignore-scripts
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: npm run build
|
||||||
|
|
||||||
|
- name: Check upstream freshness
|
||||||
|
id: check
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set +e
|
||||||
|
npm run check-updates 2>&1 | tee check-updates.log
|
||||||
|
STATUS=${PIPESTATUS[0]}
|
||||||
|
echo "status_code=$STATUS" >> "$GITHUB_OUTPUT"
|
||||||
|
if [ "$STATUS" -eq 1 ]; then
|
||||||
|
echo "updates_found=true" >> "$GITHUB_OUTPUT"
|
||||||
|
elif [ "$STATUS" -eq 0 ]; then
|
||||||
|
echo "updates_found=false" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "updates_found=false" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "check_error=true" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
- name: Upload check output
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: check-updates-log
|
||||||
|
path: check-updates.log
|
||||||
|
retention-days: 14
|
||||||
|
|
||||||
|
report:
|
||||||
|
needs: check-updates
|
||||||
|
if: always()
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Create or update issue when updates found
|
||||||
|
if: needs.check-updates.outputs.updates_found == 'true' || needs.check-updates.outputs.check_error == 'true'
|
||||||
|
uses: actions/github-script@v7
|
||||||
|
with:
|
||||||
|
script: |
|
||||||
|
const updatesFound = '${{ needs.check-updates.outputs.updates_found }}' === 'true';
|
||||||
|
const checkError = '${{ needs.check-updates.outputs.check_error }}' === 'true';
|
||||||
|
|
||||||
|
const title = checkError
|
||||||
|
? 'Data Freshness: Check errors detected'
|
||||||
|
: 'Data Freshness: Updates available from upstream source';
|
||||||
|
|
||||||
|
const body = [
|
||||||
|
`**Automated freshness check:** ${new Date().toISOString()}`,
|
||||||
|
'',
|
||||||
|
`| Metric | Value |`,
|
||||||
|
`|--------|-------|`,
|
||||||
|
`| Updates found | ${updatesFound} |`,
|
||||||
|
`| Check errors | ${checkError} |`,
|
||||||
|
`| Exit code | ${{ needs.check-updates.outputs.status_code }} |`,
|
||||||
|
'',
|
||||||
|
'### Recommended actions',
|
||||||
|
'1. Run `npm run check-updates` locally for details',
|
||||||
|
'2. If updates available, run ingestion and rebuild database',
|
||||||
|
'3. Re-run contract tests before release',
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
const { data: issues } = await github.rest.issues.listForRepo({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
state: 'open',
|
||||||
|
labels: 'data-update',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (issues.length > 0) {
|
||||||
|
await github.rest.issues.update({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: issues[0].number,
|
||||||
|
title,
|
||||||
|
body,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
await github.rest.issues.create({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
title,
|
||||||
|
body,
|
||||||
|
labels: ['data-update', 'automated'],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Close stale issues when clean
|
||||||
|
if: needs.check-updates.outputs.updates_found != 'true' && needs.check-updates.outputs.check_error != 'true'
|
||||||
|
uses: actions/github-script@v7
|
||||||
|
with:
|
||||||
|
script: |
|
||||||
|
const { data: issues } = await github.rest.issues.listForRepo({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
state: 'open',
|
||||||
|
labels: 'data-update',
|
||||||
|
});
|
||||||
|
for (const issue of issues) {
|
||||||
|
await github.rest.issues.update({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: issue.number,
|
||||||
|
state: 'closed',
|
||||||
|
state_reason: 'completed',
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -52,7 +52,8 @@
|
|||||||
"validate": "npm run lint && npm test && npm run test:contract",
|
"validate": "npm run lint && npm test && npm run test:contract",
|
||||||
"lint": "tsc --noEmit",
|
"lint": "tsc --noEmit",
|
||||||
"prepublishOnly": "npm run build",
|
"prepublishOnly": "npm run build",
|
||||||
"postinstall": "test -d dist || npm run build || true"
|
"postinstall": "test -d dist || npm run build || true",
|
||||||
|
"check-updates": "node --import tsx scripts/check-updates.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ansvar/mcp-sqlite": "^1.0.3",
|
"@ansvar/mcp-sqlite": "^1.0.3",
|
||||||
|
|||||||
159
scripts/check-updates.ts
Normal file
159
scripts/check-updates.ts
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
#!/usr/bin/env tsx
|
||||||
|
/**
|
||||||
|
* Israeli Law MCP — Data Freshness Checker
|
||||||
|
*
|
||||||
|
* Checks whether the local database is stale or missing expected legislation.
|
||||||
|
*
|
||||||
|
* Detection strategy:
|
||||||
|
* 1. Database age — flags if build_date > MAX_AGE days old
|
||||||
|
* 2. Document count — compares DB rows against census.json expected count
|
||||||
|
* 3. Source portal — verifies the official legal portal is reachable
|
||||||
|
*
|
||||||
|
* Exit codes:
|
||||||
|
* 0 = database is fresh, no updates detected
|
||||||
|
* 1 = updates detected (stale DB, missing documents, or new content upstream)
|
||||||
|
* 2 = check failed (DB missing, portal unreachable, unexpected error)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { existsSync, readFileSync } from 'node:fs';
|
||||||
|
import { dirname, resolve } from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
|
||||||
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const DB_PATH = resolve(__dirname, '../data/database.db');
|
||||||
|
const CENSUS_PATH = resolve(__dirname, '../data/census.json');
|
||||||
|
|
||||||
|
const MAX_DB_AGE_DAYS = Number(process.env['MAX_DB_AGE_DAYS'] ?? '90');
|
||||||
|
const PORTAL_URL = 'https://www.nevo.co.il';
|
||||||
|
const PORTAL_NAME = 'Nevo Legal Database';
|
||||||
|
|
||||||
|
interface CensusSummary {
|
||||||
|
total_laws?: number;
|
||||||
|
total_ingestable?: number;
|
||||||
|
total_ingested?: number;
|
||||||
|
[key: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
function daysSince(isoDate: string): number | null {
|
||||||
|
const dt = new Date(isoDate);
|
||||||
|
if (Number.isNaN(dt.getTime())) return null;
|
||||||
|
return Math.floor((Date.now() - dt.getTime()) / (1000 * 60 * 60 * 24));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkPortal(url: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), 15_000);
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method: 'HEAD',
|
||||||
|
signal: controller.signal,
|
||||||
|
headers: { 'User-Agent': '@ansvar/israeli-law-mcp/1.0 (data-freshness-check)' },
|
||||||
|
});
|
||||||
|
clearTimeout(timeout);
|
||||||
|
return res.ok || res.status === 301 || res.status === 302 || res.status === 403;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
console.log('Israeli Law MCP — Data Freshness Check');
|
||||||
|
console.log(`Portal: ${PORTAL_NAME} (${PORTAL_URL})`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// --- 1. Database existence ---
|
||||||
|
if (!existsSync(DB_PATH)) {
|
||||||
|
console.error('ERROR: Database not found at', DB_PATH);
|
||||||
|
console.error('Run "npm run build:db" first.');
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- 2. Database age check ---
|
||||||
|
let updatesNeeded = false;
|
||||||
|
const { default: Database } = await import('@ansvar/mcp-sqlite');
|
||||||
|
const db = new Database(DB_PATH, { readonly: true });
|
||||||
|
|
||||||
|
let buildDate: string | null = null;
|
||||||
|
try {
|
||||||
|
const row = db.prepare("SELECT value FROM db_metadata WHERE key = 'build_date'").get() as { value: string } | undefined;
|
||||||
|
buildDate = row?.value ?? null;
|
||||||
|
} catch {
|
||||||
|
// db_metadata table may not exist
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buildDate) {
|
||||||
|
const age = daysSince(buildDate);
|
||||||
|
if (age !== null && age > MAX_DB_AGE_DAYS) {
|
||||||
|
console.log(`STALE: Database is ${age} days old (threshold: ${MAX_DB_AGE_DAYS} days)`);
|
||||||
|
updatesNeeded = true;
|
||||||
|
} else if (age !== null) {
|
||||||
|
console.log(`OK: Database is ${age} days old (threshold: ${MAX_DB_AGE_DAYS} days)`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('WARN: No build_date in db_metadata — cannot assess age');
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- 3. Document count check ---
|
||||||
|
let dbDocCount = 0;
|
||||||
|
let dbProvCount = 0;
|
||||||
|
try {
|
||||||
|
const docRow = db.prepare("SELECT COUNT(*) as count FROM legal_documents").get() as { count: number };
|
||||||
|
dbDocCount = docRow.count;
|
||||||
|
console.log(`DB documents: ${dbDocCount}`);
|
||||||
|
} catch {
|
||||||
|
console.log('WARN: Cannot count legal_documents');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const provRow = db.prepare("SELECT COUNT(*) as count FROM legal_provisions").get() as { count: number };
|
||||||
|
dbProvCount = provRow.count;
|
||||||
|
console.log(`DB provisions: ${dbProvCount}`);
|
||||||
|
} catch {
|
||||||
|
console.log('WARN: Cannot count legal_provisions');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare against census if available
|
||||||
|
if (existsSync(CENSUS_PATH)) {
|
||||||
|
try {
|
||||||
|
const census = JSON.parse(readFileSync(CENSUS_PATH, 'utf-8')) as { summary?: CensusSummary };
|
||||||
|
const expected = census.summary?.total_ingested ?? census.summary?.total_ingestable ?? census.summary?.total_laws;
|
||||||
|
if (expected && dbDocCount < expected) {
|
||||||
|
console.log(`MISSING: DB has ${dbDocCount} documents but census expects ${expected}`);
|
||||||
|
updatesNeeded = true;
|
||||||
|
} else if (expected) {
|
||||||
|
console.log(`OK: DB documents (${dbDocCount}) >= census expected (${expected})`);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
console.log('WARN: Could not parse census.json');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('INFO: No census.json — skipping count comparison');
|
||||||
|
}
|
||||||
|
|
||||||
|
db.close();
|
||||||
|
|
||||||
|
// --- 4. Source portal reachability ---
|
||||||
|
console.log('');
|
||||||
|
console.log(`Checking portal: ${PORTAL_URL}`);
|
||||||
|
const portalOk = await checkPortal(PORTAL_URL);
|
||||||
|
if (portalOk) {
|
||||||
|
console.log(`OK: ${PORTAL_NAME} is reachable`);
|
||||||
|
} else {
|
||||||
|
console.log(`WARN: ${PORTAL_NAME} is unreachable — manual check recommended`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Result ---
|
||||||
|
console.log('');
|
||||||
|
if (updatesNeeded) {
|
||||||
|
console.log('RESULT: Updates detected — re-ingestion recommended');
|
||||||
|
process.exit(1);
|
||||||
|
} else {
|
||||||
|
console.log('RESULT: Database appears current — no updates needed');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error('Unexpected error:', err);
|
||||||
|
process.exit(2);
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user