<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Navi — Eval</title>
<style>
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg: #0e0e0e;
--bg2: #161616;
--bg3: #1e1e1e;
--bg4: #222;
--border: #252525;
--border2: #2e2e2e;
--text: #d4d4d4;
--text2: #888;
--text3: #555;
--accent: #4ec9b0;
--c-good: #6a9955;
--c-warn: #dcdcaa;
--c-bad: #f48771;
--c-stale: #c586c0;
--c-user: #569cd6;
--c-tool: #ce9178;
}
html, body { height: 100%; }
body {
font-family: ui-monospace, "Cascadia Code", "Fira Code", "JetBrains Mono", monospace;
font-size: 13px;
background: var(--bg);
color: var(--text);
display: flex;
flex-direction: column;
height: 100vh;
overflow: hidden;
}
/* ── Header ── */
header {
flex-shrink: 0;
background: var(--bg2);
border-bottom: 1px solid var(--border);
padding: 0 14px;
display: flex;
align-items: stretch;
gap: 10px;
}
.logo {
font-size: 11px;
color: var(--text3);
letter-spacing: .1em;
text-transform: uppercase;
white-space: nowrap;
align-self: center;
padding: 10px 0;
}
.logo b { color: var(--accent); }
.tabs { display: flex; align-items: stretch; gap: 2px; margin-left: 10px; }
.tab {
padding: 0 14px;
font-family: inherit; font-size: 12px;
color: var(--text3);
background: none; border: none;
border-bottom: 2px solid transparent;
cursor: pointer;
transition: color .15s, border-color .15s;
}
.tab:hover { color: var(--text2); }
.tab.active { color: var(--text); border-bottom-color: var(--accent); }
.header-right {
margin-left: auto;
display: flex; align-items: center; gap: 10px;
font-size: 11px; color: var(--text3);
white-space: nowrap;
}
.header-right a { color: var(--text2); text-decoration: none; }
.header-right a:hover { color: var(--text); }
/* ── Form controls ── */
input, button, select {
font-family: inherit;
font-size: 12px;
background: var(--bg3);
border: 1px solid var(--border2);
color: var(--text);
border-radius: 4px;
outline: none;
}
input, select { padding: 4px 8px; }
input:focus, select:focus { border-color: #555; }
button { padding: 4px 10px; cursor: pointer; }
button:hover { background: var(--bg4); border-color: #555; }
button:disabled { opacity: 0.4; cursor: not-allowed; }
button.primary { background: #1e3a2f; border-color: var(--accent); color: var(--accent); }
button.primary:hover { background: #28503f; }
/* ── Layout ── */
main { flex: 1; overflow-y: auto; padding: 14px 16px; }
.pane { display: none; }
.pane.active { display: block; }
h2 { font-size: 13px; color: var(--text2); margin-bottom: 10px; font-weight: 600;
text-transform: uppercase; letter-spacing: .08em; }
h3 { font-size: 12px; color: var(--text3); margin: 12px 0 6px; font-weight: 600;
text-transform: uppercase; letter-spacing: .06em; }
.filters {
display: flex; gap: 8px; align-items: center;
margin-bottom: 12px;
padding-bottom: 12px;
border-bottom: 1px solid var(--border);
flex-wrap: wrap;
}
.filters label { font-size: 11px; color: var(--text3); margin-right: 4px; }
/* ── Table ── */
table {
width: 100%;
border-collapse: collapse;
font-size: 12px;
}
th, td {
padding: 6px 10px;
text-align: left;
border-bottom: 1px solid var(--border);
vertical-align: middle;
}
th { color: var(--text3); font-weight: 600; font-size: 10px;
text-transform: uppercase; letter-spacing: .06em;
background: var(--bg2); position: sticky; top: 0; z-index: 1; }
tr:hover td { background: var(--bg2); }
tr.clickable { cursor: pointer; }
td.dim { color: var(--text3); }
td.right { text-align: right; }
.pill { font-size: 10px; padding: 2px 6px; border-radius: 3px;
text-transform: uppercase; letter-spacing: .04em; }
.pill-evaluated { background: #1e3a2f; color: var(--c-good); }
.pill-pending { background: #3a3a1e; color: var(--c-warn); }
.pill-stale { background: #2d1e3a; color: var(--c-stale); }
.pill-running { background: #1e2d40; color: var(--c-user); }
.pill-failed { background: #3a1e2d; color: var(--c-bad); }
.pill-ok { background: #1e3a2f; color: var(--c-good); }
.session-id-mono { font-size: 11px; color: var(--text2); }
/* ── Detail tab ── */
.meta-grid {
display: grid;
grid-template-columns: 140px 1fr;
gap: 4px 16px;
font-size: 12px;
margin-bottom: 18px;
}
.meta-grid dt { color: var(--text3); }
.meta-grid dd { color: var(--text); word-break: break-all; }
.eval-run {
border: 1px solid var(--border);
border-radius: 6px;
margin-bottom: 14px;
background: var(--bg2);
overflow: hidden;
}
.eval-run-head {
padding: 8px 12px;
background: var(--bg3);
border-bottom: 1px solid var(--border);
font-size: 11px;
color: var(--text2);
display: flex; gap: 12px; flex-wrap: wrap;
}
.eval-run-head b { color: var(--text); }
.axis-row {
display: grid;
grid-template-columns: 200px 80px 80px 80px 80px;
gap: 8px;
padding: 4px 12px;
font-size: 11px;
align-items: center;
}
.axis-row.head { color: var(--text3); border-bottom: 1px solid var(--border); padding-bottom: 6px; padding-top: 6px; }
.axis-row .axis-name { color: var(--text2); }
.axis-row .score { text-align: right; }
.axis-row .score.avg { color: var(--accent); font-weight: 600; }
.axis-row.spread { background: var(--bg3); border-top: 1px solid var(--border); }
.comments {
padding: 6px 12px 10px;
border-top: 1px solid var(--border);
}
.comments .comment {
font-size: 11px;
color: var(--text);
margin: 6px 0;
padding-left: 12px;
border-left: 2px solid var(--border2);
}
.comments .comment-by { color: var(--text3); font-size: 10px; text-transform: uppercase; letter-spacing: .06em; margin-right: 6px; }
/* ── Stats tab ── */
.stats-table th { text-align: right; }
.stats-table th:first-child, .stats-table td:first-child { text-align: left; }
.stats-table td.bucket-cell { color: var(--text2); }
/* ── Run tab ── */
.run-form {
display: grid;
grid-template-columns: 140px 1fr;
gap: 8px 12px;
margin-bottom: 14px;
max-width: 580px;
}
.run-form label { font-size: 11px; color: var(--text3); align-self: center; }
.run-form input, .run-form select { width: 100%; }
.run-status {
border: 1px solid var(--border);
border-radius: 6px;
padding: 10px 12px;
background: var(--bg2);
margin-top: 12px;
font-size: 11px;
}
.placeholder {
text-align: center; color: var(--text3); padding: 60px 0; font-size: 12px;
}
.error-msg { color: var(--c-bad); padding: 12px; font-size: 12px; }
</style>
</head>
<body>
<header>
<div class="logo"><b>NAVI</b> EVAL</div>
<div class="tabs">
<button class="tab active" data-tab="sessions">Sessions</button>
<button class="tab" data-tab="detail">Detail</button>
<button class="tab" data-tab="stats">Stats</button>
<button class="tab" data-tab="run">Run</button>
</div>
<div class="header-right">
<span id="version-badge">judge=… rubric=…</span>
<a href="/#/admin">Admin</a>
<a href="/debug">/debug</a>
<a href="/">app</a>
</div>
</header>
<main>
<!-- ── Sessions tab ────────────────────────────────────────────────── -->
<div id="tab-sessions" class="pane active">
<div class="filters">
<label>profile</label>
<select id="filter-profile">
<option value="">all</option>
</select>
<label>status</label>
<select id="filter-status">
<option value="">all</option>
<option value="evaluated">evaluated</option>
<option value="pending">pending</option>
<option value="stale">stale</option>
</select>
<label>limit</label>
<select id="filter-limit">
<option>50</option>
<option>100</option>
<option>200</option>
</select>
<button id="btn-refresh">refresh</button>
<span id="sessions-count" style="margin-left:auto; color: var(--text3); font-size: 11px;"></span>
</div>
<div id="sessions-table-wrap">
<table>
<thead>
<tr>
<th>started</th>
<th>profile</th>
<th>session</th>
<th>name</th>
<th class="right">msgs</th>
<th class="right">👍</th>
<th class="right">👎</th>
<th>status</th>
<th class="right">avg goal</th>
<th class="right">avg tools</th>
<th class="right">avg comm</th>
</tr>
</thead>
<tbody id="sessions-tbody"></tbody>
</table>
</div>
</div>
<!-- ── Detail tab ──────────────────────────────────────────────────── -->
<div id="tab-detail" class="pane">
<div class="filters">
<label>session id</label>
<input id="detail-session-id" placeholder="paste session id" style="flex: 1; max-width: 380px;">
<button id="btn-load-detail">load</button>
<button id="btn-detail-evaluate">evaluate this session</button>
</div>
<div id="detail-body"><div class="placeholder">Pick a session from the Sessions tab or paste an id.</div></div>
</div>
<!-- ── Stats tab ───────────────────────────────────────────────────── -->
<div id="tab-stats" class="pane">
<div class="filters">
<label>days</label>
<select id="stats-days">
<option>7</option>
<option>14</option>
<option selected>30</option>
<option>60</option>
<option>90</option>
</select>
<label>
<input type="checkbox" id="stats-by-bucket"> split by complexity bucket
</label>
<button id="btn-load-stats">load</button>
</div>
<div id="stats-body"><div class="placeholder">Press load to fetch.</div></div>
</div>
<!-- ── Run tab ─────────────────────────────────────────────────────── -->
<div id="tab-run" class="pane">
<h2>Trigger eval run</h2>
<div class="run-form">
<label>scope</label>
<select id="run-scope">
<option value="unevaluated">unevaluated (current rubric)</option>
<option value="session">single session</option>
<option value="all">all sessions (re-eval)</option>
</select>
<label>session id</label>
<input id="run-session-id" placeholder="only for scope=session">
<label>since</label>
<input id="run-since" type="datetime-local">
<label>limit</label>
<input id="run-limit" type="number" min="1" placeholder="(optional)">
<label>model</label>
<input id="run-model" value="gemma4:31b-cloud">
<label>backend</label>
<input id="run-backend" value="ollama">
<span></span>
<div>
<button id="btn-trigger-run" class="primary">start run</button>
</div>
</div>
<h3>Active / latest run</h3>
<div id="run-active"><div class="placeholder">No active run.</div></div>
<h3>Run history</h3>
<div id="run-history"><div class="placeholder">No runs yet.</div></div>
</div>
</main>
<script>
'use strict';
// ── tiny utils ─────────────────────────────────────────────────────────
const $ = (sel, root = document) => root.querySelector(sel);
const $$ = (sel, root = document) => Array.from(root.querySelectorAll(sel));
const e = (tag, attrs = {}, ...children) => {
const el = document.createElement(tag);
for (const [k, v] of Object.entries(attrs)) {
if (k === 'class') el.className = v;
else if (k === 'dataset') Object.assign(el.dataset, v);
else if (k.startsWith('on') && typeof v === 'function') el.addEventListener(k.slice(2), v);
else if (v != null) el.setAttribute(k, v);
}
for (const c of children.flat()) {
if (c == null || c === false) continue;
el.append(c instanceof Node ? c : document.createTextNode(String(c)));
}
return el;
};
const fmtDate = iso => iso ? new Date(iso).toLocaleString('sv-SE').replace('T', ' ').slice(0, 16) : '—';
const fmtDateShort = iso => iso ? new Date(iso).toLocaleDateString('sv-SE') + ' ' + new Date(iso).toLocaleTimeString('sv-SE').slice(0, 5) : '—';
const shortId = id => id ? id.slice(0, 8) : '—';
const numOrDash = v => (v == null ? '—' : v);
const api = {
async get(path) {
const r = await fetch(path);
if (!r.ok) throw new Error(`${r.status} ${r.statusText}: ${await r.text()}`);
return r.json();
},
async post(path, body) {
const r = await fetch(path, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
if (!r.ok) throw new Error(`${r.status} ${r.statusText}: ${await r.text()}`);
return r.json();
},
sessions(params) { return api.get(`/eval/sessions?${new URLSearchParams(params)}`); },
session(id) { return api.get(`/eval/sessions/${id}`); },
stats(params) { return api.get(`/eval/stats?${new URLSearchParams(params)}`); },
startRun(body) { return api.post('/eval/run', body); },
run(id) { return api.get(`/eval/run/${id}`); },
runs() { return api.get('/eval/runs'); },
profiles() { return api.get('/agents/profiles').catch(() => []); },
};
// ── tab routing ────────────────────────────────────────────────────────
function activateTab(name) {
$$('.tab').forEach(t => t.classList.toggle('active', t.dataset.tab === name));
$$('.pane').forEach(p => p.classList.toggle('active', p.id === `tab-${name}`));
if (name === 'sessions') loadSessions();
if (name === 'stats') loadStats();
if (name === 'run') loadRunHistory();
}
$$('.tab').forEach(btn => btn.addEventListener('click', () => {
const t = btn.dataset.tab;
history.replaceState(null, '', `#${t}`);
activateTab(t);
}));
// hash routing: #detail/<id>
function applyHash() {
const m = location.hash.match(/^#(\w+)(?:\/(.+))?/);
if (!m) { activateTab('sessions'); return; }
const tab = m[1];
if (tab === 'detail' && m[2]) {
activateTab('detail');
$('#detail-session-id').value = decodeURIComponent(m[2]);
loadDetail();
} else {
activateTab(tab);
}
}
window.addEventListener('hashchange', applyHash);
// ── version badge ─────────────────────────────────────────────────────
async function loadVersionBadge() {
// Read from a sample session response or from a config endpoint.
// Simple: poll one stats call — it always returns; if no rows we still
// hardcode display from any session row that comes back.
try {
const rows = await api.sessions({ limit: 1, status: 'evaluated' });
const r = rows[0];
if (r) {
$('#version-badge').textContent =
`judge=${r.latest_judge_version || '?'} rubric=${r.latest_rubric_version || '?'}`;
return;
}
} catch (_) {}
$('#version-badge').textContent = 'judge=v1 rubric=v1';
}
// ── populate profile filter ───────────────────────────────────────────
async function loadProfileFilter() {
try {
const profiles = await api.profiles();
const sel = $('#filter-profile');
for (const p of profiles) {
sel.appendChild(e('option', { value: p.id }, p.id));
}
} catch (_) {}
}
// ── Sessions tab ──────────────────────────────────────────────────────
async function loadSessions() {
const tbody = $('#sessions-tbody');
tbody.innerHTML = '';
const params = {
limit: $('#filter-limit').value,
offset: 0,
};
const profile = $('#filter-profile').value;
const status = $('#filter-status').value;
if (profile) params.profile = profile;
if (status) params.status = status;
try {
const rows = await api.sessions(params);
$('#sessions-count').textContent = `${rows.length} row${rows.length === 1 ? '' : 's'}`;
if (rows.length === 0) {
tbody.appendChild(e('tr', {}, e('td', { colspan: 11, class: 'dim' }, 'No sessions matched.')));
return;
}
for (const r of rows) {
const avg = r.latest_avg || {};
const tr = e('tr', {
class: 'clickable',
onclick: () => { location.hash = `#detail/${r.session_id}`; },
},
e('td', { class: 'dim' }, fmtDateShort(r.created_at)),
e('td', {}, r.profile_id),
e('td', { class: 'session-id-mono' }, shortId(r.session_id)),
e('td', {}, r.name || ''),
e('td', { class: 'right' }, r.msg_count),
e('td', { class: 'right' }, r.likes || ''),
e('td', { class: 'right' }, r.dislikes || ''),
e('td', {}, e('span', { class: `pill pill-${r.eval_status}` }, r.eval_status)),
e('td', { class: 'right' }, numOrDash(avg.goal_completion)),
e('td', { class: 'right' }, numOrDash(avg.tool_usage_quality)),
e('td', { class: 'right' }, numOrDash(avg.communication)),
);
tbody.appendChild(tr);
}
} catch (err) {
tbody.appendChild(e('tr', {}, e('td', { colspan: 11, class: 'error-msg' }, `Error: ${err.message}`)));
}
}
$('#btn-refresh').addEventListener('click', loadSessions);
$('#filter-profile').addEventListener('change', loadSessions);
$('#filter-status').addEventListener('change', loadSessions);
$('#filter-limit').addEventListener('change', loadSessions);
// ── Detail tab ────────────────────────────────────────────────────────
async function loadDetail() {
const id = $('#detail-session-id').value.trim();
const body = $('#detail-body');
if (!id) { body.innerHTML = '<div class="placeholder">Enter a session id.</div>'; return; }
body.innerHTML = '<div class="placeholder">Loading…</div>';
try {
const d = await api.session(id);
const meta = e('dl', { class: 'meta-grid' },
e('dt', {}, 'session id'), e('dd', { class: 'session-id-mono' }, d.session_id),
e('dt', {}, 'profile'), e('dd', {}, d.profile_id),
e('dt', {}, 'name'), e('dd', {}, d.name || '—'),
e('dt', {}, 'started'), e('dd', {}, fmtDate(d.created_at)),
e('dt', {}, 'last active'),e('dd', {}, fmtDate(d.last_active)),
e('dt', {}, 'messages'), e('dd', {}, d.msg_count),
e('dt', {}, 'feedback'), e('dd', {}, `${d.feedback.filter(f => f.rating === 1).length} 👍 / ${d.feedback.filter(f => f.rating === -1).length} 👎`),
e('dt', {}, 'evaluations'),e('dd', {}, d.evaluations.length),
);
body.innerHTML = '';
body.appendChild(meta);
if (d.evaluations.length === 0) {
body.appendChild(e('div', { class: 'placeholder' }, 'No evaluations stored.'));
return;
}
body.appendChild(renderEvalRuns(d.evaluations));
} catch (err) {
body.innerHTML = '';
body.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
}
}
function renderEvalRuns(rows) {
// group by eval_run_id, preserve eval_date desc
const byRun = new Map();
for (const r of rows) {
const k = r.eval_run_id;
if (!byRun.has(k)) byRun.set(k, []);
byRun.get(k).push(r);
}
const wrap = e('div');
for (const [runId, group] of byRun) {
const head = group[0];
const head_el = e('div', { class: 'eval-run-head' },
e('span', {}, e('b', {}, fmtDate(head.eval_date))),
e('span', {}, `judge=${head.judge_model} (${head.judge_version})`),
e('span', {}, `rubric=${head.rubric_version}`),
e('span', { class: 'session-id-mono' }, `run=${runId.slice(0, 8)}`),
);
const axes = ['task_complexity','goal_completion','tool_usage_quality',
'efficiency','communication','subagent_orchestration','self_extension'];
const expertCols = group.map(g => g.expert_id);
const headerRow = e('div', { class: 'axis-row head' },
e('span', { class: 'axis-name' }, 'axis'),
...expertCols.map(eid => e('span', { class: 'score' }, eid.split('_').pop())),
e('span', { class: 'score' }, 'avg'),
);
const rowsEl = [headerRow];
for (const axis of axes) {
const vals = group.map(g => g.scores[axis]);
const nonNull = vals.filter(v => v != null);
const avg = nonNull.length ? Math.round(nonNull.reduce((s,v) => s + v, 0) / nonNull.length) : null;
rowsEl.push(e('div', { class: 'axis-row' },
e('span', { class: 'axis-name' }, axis),
...vals.map(v => e('span', { class: 'score' }, numOrDash(v))),
e('span', { class: 'score avg' }, numOrDash(avg)),
));
}
const comments = e('div', { class: 'comments' },
...group.map(g => e('div', { class: 'comment' },
e('span', { class: 'comment-by' }, g.expert_id),
g.comment,
)),
);
const block = e('div', { class: 'eval-run' }, head_el, ...rowsEl, comments);
wrap.appendChild(block);
}
return wrap;
}
$('#btn-load-detail').addEventListener('click', () => {
const id = $('#detail-session-id').value.trim();
if (id) location.hash = `#detail/${id}`;
});
$('#btn-detail-evaluate').addEventListener('click', async () => {
const id = $('#detail-session-id').value.trim();
if (!id) return;
await api.startRun({
scope: 'session', session_id: id,
model: $('#run-model').value || 'gemma4:31b-cloud',
backend: $('#run-backend').value || 'ollama',
});
alert('Run started — check the Run tab for progress.');
});
// ── Stats tab ─────────────────────────────────────────────────────────
async function loadStats() {
const body = $('#stats-body');
body.innerHTML = '<div class="placeholder">Loading…</div>';
try {
const params = {
days: $('#stats-days').value,
by_complexity_bucket: $('#stats-by-bucket').checked ? 'true' : 'false',
};
const data = await api.stats(params);
body.innerHTML = '';
if (data.weekly.length === 0) {
body.appendChild(e('div', { class: 'placeholder' }, 'No evaluations in this window.'));
return;
}
const axes = ['task_complexity','goal_completion','tool_usage_quality',
'efficiency','communication','subagent_orchestration','self_extension'];
const tbl = e('table', { class: 'stats-table' });
const thead = e('thead', {}, e('tr', {},
e('th', {}, 'week'),
e('th', {}, 'bucket'),
e('th', {}, 'n'),
...axes.map(a => e('th', {}, a.replace(/_/g, ' '))),
));
const tbody = e('tbody');
for (const w of data.weekly) {
tbody.appendChild(e('tr', {},
e('td', {}, w.week_start.slice(0, 10)),
e('td', { class: 'bucket-cell' }, w.bucket),
e('td', { class: 'right dim' }, w.sample_count),
...axes.map(a => e('td', { class: 'right' }, numOrDash(w.axis_means[a]))),
));
}
tbl.appendChild(thead);
tbl.appendChild(tbody);
body.appendChild(tbl);
} catch (err) {
body.innerHTML = '';
body.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
}
}
$('#btn-load-stats').addEventListener('click', loadStats);
// ── Run tab ───────────────────────────────────────────────────────────
let activeRunId = null;
let runPollTimer = null;
async function loadRunHistory() {
const wrap = $('#run-history');
try {
const runs = await api.runs();
if (runs.length === 0) {
wrap.innerHTML = '<div class="placeholder">No runs yet.</div>';
return;
}
wrap.innerHTML = '';
const tbl = e('table', {});
tbl.appendChild(e('thead', {}, e('tr', {},
e('th', {}, 'started'),
e('th', {}, 'state'),
e('th', {}, 'judge_model'),
e('th', {}, 'sessions'),
e('th', {}, 'ok / fail'),
e('th', {}, 'run_id'),
)));
const tbody = e('tbody');
for (const r of runs) {
const ok = (r.sessions || []).filter(s => s.state === 'ok').length;
const fail = (r.sessions || []).filter(s => s.state === 'failed').length;
tbody.appendChild(e('tr', {
class: 'clickable',
onclick: () => attachActiveRun(r.run_id),
},
e('td', { class: 'dim' }, fmtDateShort(r.started_at)),
e('td', {}, e('span', { class: `pill pill-${r.state}` }, r.state)),
e('td', {}, r.judge_model),
e('td', { class: 'right' }, (r.sessions || []).length),
e('td', {}, `${ok} / ${fail}`),
e('td', { class: 'session-id-mono' }, shortId(r.run_id)),
));
}
tbl.appendChild(tbody);
wrap.appendChild(tbl);
} catch (err) {
wrap.innerHTML = '';
wrap.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
}
}
function attachActiveRun(runId) {
if (runPollTimer) { clearInterval(runPollTimer); runPollTimer = null; }
activeRunId = runId;
pollActiveRun();
runPollTimer = setInterval(pollActiveRun, 2500);
}
async function pollActiveRun() {
if (!activeRunId) return;
const wrap = $('#run-active');
try {
const s = await api.run(activeRunId);
wrap.innerHTML = '';
const head = e('div', { class: 'run-status' },
e('div', {},
e('b', {}, `run ${s.run_id.slice(0, 8)}`),
' ',
e('span', { class: `pill pill-${s.state}` }, s.state),
' ',
e('span', { class: 'dim' }, `started ${fmtDate(s.started_at)}`),
s.finished_at ? e('span', { class: 'dim' }, ` finished ${fmtDate(s.finished_at)}`) : null,
),
e('div', { style: 'margin-top:6px;' },
`judge=${s.judge_model} (${s.judge_version}) rubric=${s.rubric_version}`,
),
e('table', { style: 'margin-top: 10px;' },
e('thead', {}, e('tr', {},
e('th', {}, 'session'),
e('th', {}, 'state'),
e('th', {}, 'avg'),
e('th', {}, 'note'),
)),
e('tbody', {},
...s.sessions.map(ss => e('tr', {},
e('td', { class: 'session-id-mono' }, shortId(ss.session_id)),
e('td', {}, e('span', { class: `pill pill-${ss.state}` }, ss.state)),
e('td', {}, ss.avg ? `goal=${ss.avg.goal_completion} tools=${ss.avg.tool_usage_quality} comm=${ss.avg.communication}` : ''),
e('td', { class: 'dim' }, ss.error || ''),
)),
),
),
);
wrap.appendChild(head);
if (s.state !== 'running') {
clearInterval(runPollTimer);
runPollTimer = null;
loadRunHistory();
}
} catch (err) {
wrap.innerHTML = '';
wrap.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
}
}
$('#btn-trigger-run').addEventListener('click', async () => {
const body = {
scope: $('#run-scope').value,
session_id: $('#run-session-id').value.trim() || null,
since: $('#run-since').value ? new Date($('#run-since').value).toISOString() : null,
limit: $('#run-limit').value ? Number($('#run-limit').value) : null,
model: $('#run-model').value || 'gemma4:31b-cloud',
backend: $('#run-backend').value || 'ollama',
};
// Strip nulls — RunRequest fields are optional but must serialise cleanly.
for (const k of Object.keys(body)) if (body[k] == null || body[k] === '') delete body[k];
try {
const status = await api.startRun(body);
attachActiveRun(status.run_id);
} catch (err) {
alert(`Failed to start run: ${err.message}`);
}
});
// ── boot ──────────────────────────────────────────────────────────────
loadVersionBadge();
loadProfileFilter();
applyHash();
</script>
</body>
</html>