// AgentTests — capability test runner panel. // // Calls POST /api/agents/test to run the test suite (backend/agent_tests.py) // against the live editor agent. Each test fires a fixed question, validates // the response satisfies a contract (right tool called, required keywords // present, citation discipline, length budget), and reports pass/fail with // an approximate token cost. Use as the SET STANDARD any token-efficiency // change must preserve. const { useState: atUseState, useEffect: atUseEffect, useCallback: atUseCallback } = React; function AgentTests({ open, onClose }) { if (!open) return null; const [manifest, setManifest] = atUseState([]); const [running, setRunning] = atUseState(false); const [report, setReport] = atUseState(null); const [err, setErr] = atUseState(''); const [selected, setSelected] = atUseState(new Set()); // Load manifest on open atUseEffect(() => { fetch('/api/agents/test/manifest') .then(r => r.ok ? r.json() : null) .then(j => setManifest(j?.tests || [])) .catch(() => {}); }, []); const run = atUseCallback(async (idsArg = null) => { if (running) return; setRunning(true); setReport(null); setErr(''); try { const body = {}; if (idsArg && idsArg.length > 0) body.ids = idsArg; const r = await fetch('/api/agents/test', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), }); if (!r.ok) { throw new Error(`HTTP ${r.status}: ${(await r.text()).slice(0, 100)}`); } setReport(await r.json()); } catch (e) { setErr(String(e.message || e)); } finally { setRunning(false); } }, [running]); const toggle = (id) => { setSelected(s => { const n = new Set(s); n.has(id) ? n.delete(id) : n.add(id); return n; }); }; return (