|
| 1 | +#!/usr/bin/env node |
| 2 | +import fs from 'node:fs'; |
| 3 | +import path from 'node:path'; |
| 4 | +import { fileURLToPath } from 'node:url'; |
| 5 | + |
| 6 | +const __dirname = path.dirname(fileURLToPath(import.meta.url)); |
| 7 | +const DEFAULT_POLICY = path.join(__dirname, '..', 'policy.example.json'); |
| 8 | +const DEFAULT_TASK = path.join(__dirname, 'task.example.json'); |
| 9 | + |
| 10 | +const REQUIRED_SECTIONS = [ |
| 11 | + 'agent_manifest', |
| 12 | + 'context_policy', |
| 13 | + 'tool_policy', |
| 14 | + 'budget_policy', |
| 15 | + 'approval_policy', |
| 16 | + 'memory_policy', |
| 17 | + 'swarm_policy', |
| 18 | + 'deployment_policy', |
| 19 | +]; |
| 20 | + |
| 21 | +function parseArgs(argv) { |
| 22 | + const args = { |
| 23 | + policy: DEFAULT_POLICY, |
| 24 | + task: DEFAULT_TASK, |
| 25 | + }; |
| 26 | + |
| 27 | + for (let i = 0; i < argv.length; i += 1) { |
| 28 | + const arg = argv[i]; |
| 29 | + if (arg === '--policy') args.policy = argv[++i]; |
| 30 | + else if (arg === '--task') args.task = argv[++i]; |
| 31 | + else if (arg === '--help' || arg === '-h') { |
| 32 | + printHelp(); |
| 33 | + process.exit(0); |
| 34 | + } else { |
| 35 | + throw new Error(`Unknown argument: ${arg}`); |
| 36 | + } |
| 37 | + } |
| 38 | + |
| 39 | + return args; |
| 40 | +} |
| 41 | + |
| 42 | +function printHelp() { |
| 43 | + console.log(`Usage: |
| 44 | + node micro-ecf/simulator/run.mjs --policy micro-ecf/policy.example.json --task micro-ecf/simulator/task.example.json |
| 45 | +
|
| 46 | +The simulator is local and no-spend. It validates a Micro ECF policy against one proposed task and prints pass/warn/fail checks. |
| 47 | +`); |
| 48 | +} |
| 49 | + |
| 50 | +function readJson(filePath) { |
| 51 | + const resolved = path.resolve(filePath); |
| 52 | + return { |
| 53 | + resolved, |
| 54 | + value: JSON.parse(fs.readFileSync(resolved, 'utf8')), |
| 55 | + }; |
| 56 | +} |
| 57 | + |
| 58 | +function asList(value) { |
| 59 | + return Array.isArray(value) ? value : []; |
| 60 | +} |
| 61 | + |
| 62 | +function add(checks, level, check, message, details = undefined) { |
| 63 | + checks.push({ level, check, message, ...(details === undefined ? {} : { details }) }); |
| 64 | +} |
| 65 | + |
| 66 | +function simulate(policy, task) { |
| 67 | + const checks = []; |
| 68 | + const toolPolicy = policy.tool_policy || {}; |
| 69 | + const budgetPolicy = policy.budget_policy || {}; |
| 70 | + const approvalPolicy = policy.approval_policy || {}; |
| 71 | + const deploymentPolicy = policy.deployment_policy || {}; |
| 72 | + |
| 73 | + for (const section of REQUIRED_SECTIONS) { |
| 74 | + if (policy[section] && typeof policy[section] === 'object' && !Array.isArray(policy[section])) { |
| 75 | + add(checks, 'pass', `section.${section}`, `${section} is present`); |
| 76 | + } else { |
| 77 | + add(checks, 'fail', `section.${section}`, `${section} must be an object`); |
| 78 | + } |
| 79 | + } |
| 80 | + |
| 81 | + const requestedTools = asList(task.requested_tools); |
| 82 | + const allowedTools = asList(toolPolicy.allowed_tools); |
| 83 | + const deniedTools = new Set(asList(toolPolicy.denied_tools)); |
| 84 | + const deniedRequestedTools = requestedTools.filter((tool) => deniedTools.has(tool)); |
| 85 | + const unknownRequestedTools = requestedTools.filter((tool) => !allowedTools.includes(tool) && !deniedTools.has(tool)); |
| 86 | + |
| 87 | + if (deniedRequestedTools.length > 0) { |
| 88 | + add(checks, 'fail', 'tools.denied_requested', 'Task requests tools explicitly denied by policy', deniedRequestedTools); |
| 89 | + } else { |
| 90 | + add(checks, 'pass', 'tools.denied_requested', 'Task does not request denied tools'); |
| 91 | + } |
| 92 | + |
| 93 | + if (unknownRequestedTools.length > 0) { |
| 94 | + add(checks, 'warn', 'tools.unknown_requested', 'Task requests tools not listed in allowed_tools', unknownRequestedTools); |
| 95 | + } else { |
| 96 | + add(checks, 'pass', 'tools.unknown_requested', 'Requested tools are covered by allowed_tools or denied_tools'); |
| 97 | + } |
| 98 | + |
| 99 | + const estimatedCost = Number(task.estimated_cost_usdc || 0); |
| 100 | + const maxDailySpend = Number(budgetPolicy.max_daily_spend_usdc || 0); |
| 101 | + const approvalAbove = Number(budgetPolicy.approval_required_above_usdc || 0); |
| 102 | + |
| 103 | + if (budgetPolicy.treasury_required !== true) { |
| 104 | + add(checks, 'fail', 'budget.treasury_required', 'treasury_required must be true before Agent OS autonomous spend'); |
| 105 | + } else { |
| 106 | + add(checks, 'pass', 'budget.treasury_required', 'Treasury funding is required before autonomous spend'); |
| 107 | + } |
| 108 | + |
| 109 | + if (estimatedCost > maxDailySpend) { |
| 110 | + add(checks, 'fail', 'budget.estimated_cost', 'Estimated task cost exceeds max_daily_spend_usdc', { estimatedCost, maxDailySpend }); |
| 111 | + } else { |
| 112 | + add(checks, 'pass', 'budget.estimated_cost', 'Estimated task cost is inside the daily budget', { estimatedCost, maxDailySpend }); |
| 113 | + } |
| 114 | + |
| 115 | + if (approvalAbove > 0 && estimatedCost > approvalAbove) { |
| 116 | + add(checks, 'warn', 'approval.threshold', 'Task exceeds approval_required_above_usdc and should stop for owner approval', { estimatedCost, approvalAbove }); |
| 117 | + } else { |
| 118 | + add(checks, 'pass', 'approval.threshold', 'Task does not exceed the approval threshold', { estimatedCost, approvalAbove }); |
| 119 | + } |
| 120 | + |
| 121 | + const sideEffects = String(task.side_effects || 'none'); |
| 122 | + const humanGated = new Set(asList(approvalPolicy.human_gated)); |
| 123 | + if (sideEffects !== 'none' && toolPolicy.side_effects !== 'approval_required') { |
| 124 | + add(checks, 'fail', 'approval.side_effects', 'Side-effecting tasks must require approval'); |
| 125 | + } else if (sideEffects !== 'none' && !humanGated.has(sideEffects)) { |
| 126 | + add(checks, 'warn', 'approval.side_effects', 'Side effect is not listed in approval_policy.human_gated', sideEffects); |
| 127 | + } else { |
| 128 | + add(checks, 'pass', 'approval.side_effects', 'Side-effect policy is explicit for this task'); |
| 129 | + } |
| 130 | + |
| 131 | + if (deploymentPolicy.first_proof_required === false) { |
| 132 | + add(checks, 'fail', 'deployment.first_proof', 'first_proof_required should stay true for Agent OS handoff'); |
| 133 | + } else { |
| 134 | + add(checks, 'pass', 'deployment.first_proof', 'First proof is required before public exposure'); |
| 135 | + } |
| 136 | + |
| 137 | + const failCount = checks.filter((entry) => entry.level === 'fail').length; |
| 138 | + const warnCount = checks.filter((entry) => entry.level === 'warn').length; |
| 139 | + |
| 140 | + return { |
| 141 | + ok: failCount === 0, |
| 142 | + schema: 'agoragentic.micro-ecf.simulation.v1', |
| 143 | + summary: { |
| 144 | + pass: checks.filter((entry) => entry.level === 'pass').length, |
| 145 | + warn: warnCount, |
| 146 | + fail: failCount, |
| 147 | + no_spend: true, |
| 148 | + }, |
| 149 | + next_step: failCount === 0 |
| 150 | + ? 'Export an Agent OS Harness packet with micro-ecf/export-agent-os-harness.mjs' |
| 151 | + : 'Fix failed policy checks before exporting the Agent OS Harness packet', |
| 152 | + checks, |
| 153 | + }; |
| 154 | +} |
| 155 | + |
| 156 | +function main() { |
| 157 | + const args = parseArgs(process.argv.slice(2)); |
| 158 | + const policy = readJson(args.policy); |
| 159 | + const task = readJson(args.task); |
| 160 | + const report = simulate(policy.value, task.value); |
| 161 | + report.generated_from = { |
| 162 | + source: 'micro-ecf/simulator/run.mjs', |
| 163 | + policy_path: path.relative(process.cwd(), policy.resolved).replace(/\\/g, '/'), |
| 164 | + task_path: path.relative(process.cwd(), task.resolved).replace(/\\/g, '/'), |
| 165 | + }; |
| 166 | + process.stdout.write(`${JSON.stringify(report, null, 2)}\n`); |
| 167 | + process.exit(report.ok ? 0 : 1); |
| 168 | +} |
| 169 | + |
| 170 | +try { |
| 171 | + main(); |
| 172 | +} catch (err) { |
| 173 | + console.error(JSON.stringify({ ok: false, error: err.message }, null, 2)); |
| 174 | + process.exit(1); |
| 175 | +} |
0 commit comments