Newer
Older
navi-1 / debug / eval / index.html
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Navi — Eval</title>
  <style>
    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }

    :root {
      --bg:       #0e0e0e;
      --bg2:      #161616;
      --bg3:      #1e1e1e;
      --bg4:      #222;
      --border:   #252525;
      --border2:  #2e2e2e;
      --text:     #d4d4d4;
      --text2:    #888;
      --text3:    #555;
      --accent:   #4ec9b0;

      --c-good:   #6a9955;
      --c-warn:   #dcdcaa;
      --c-bad:    #f48771;
      --c-stale:  #c586c0;
      --c-user:   #569cd6;
      --c-tool:   #ce9178;
    }

    html, body { height: 100%; }
    body {
      font-family: ui-monospace, "Cascadia Code", "Fira Code", "JetBrains Mono", monospace;
      font-size: 13px;
      background: var(--bg);
      color: var(--text);
      display: flex;
      flex-direction: column;
      height: 100vh;
      overflow: hidden;
    }

    /* ── Header ── */
    header {
      flex-shrink: 0;
      background: var(--bg2);
      border-bottom: 1px solid var(--border);
      padding: 0 14px;
      display: flex;
      align-items: stretch;
      gap: 10px;
    }
    .logo {
      font-size: 11px;
      color: var(--text3);
      letter-spacing: .1em;
      text-transform: uppercase;
      white-space: nowrap;
      align-self: center;
      padding: 10px 0;
    }
    .logo b { color: var(--accent); }

    .tabs { display: flex; align-items: stretch; gap: 2px; margin-left: 10px; }
    .tab {
      padding: 0 14px;
      font-family: inherit; font-size: 12px;
      color: var(--text3);
      background: none; border: none;
      border-bottom: 2px solid transparent;
      cursor: pointer;
      transition: color .15s, border-color .15s;
    }
    .tab:hover { color: var(--text2); }
    .tab.active { color: var(--text); border-bottom-color: var(--accent); }

    .header-right {
      margin-left: auto;
      display: flex; align-items: center; gap: 10px;
      font-size: 11px; color: var(--text3);
      white-space: nowrap;
    }
    .header-right a { color: var(--text2); text-decoration: none; }
    .header-right a:hover { color: var(--text); }

    /* ── Form controls ── */
    input, button, select {
      font-family: inherit;
      font-size: 12px;
      background: var(--bg3);
      border: 1px solid var(--border2);
      color: var(--text);
      border-radius: 4px;
      outline: none;
    }
    input, select { padding: 4px 8px; }
    input:focus, select:focus { border-color: #555; }
    button { padding: 4px 10px; cursor: pointer; }
    button:hover { background: var(--bg4); border-color: #555; }
    button:disabled { opacity: 0.4; cursor: not-allowed; }
    button.primary { background: #1e3a2f; border-color: var(--accent); color: var(--accent); }
    button.primary:hover { background: #28503f; }

    /* ── Layout ── */
    main { flex: 1; overflow-y: auto; padding: 14px 16px; }
    .pane { display: none; }
    .pane.active { display: block; }

    h2 { font-size: 13px; color: var(--text2); margin-bottom: 10px; font-weight: 600;
         text-transform: uppercase; letter-spacing: .08em; }
    h3 { font-size: 12px; color: var(--text3); margin: 12px 0 6px; font-weight: 600;
         text-transform: uppercase; letter-spacing: .06em; }

    .filters {
      display: flex; gap: 8px; align-items: center;
      margin-bottom: 12px;
      padding-bottom: 12px;
      border-bottom: 1px solid var(--border);
      flex-wrap: wrap;
    }
    .filters label { font-size: 11px; color: var(--text3); margin-right: 4px; }

    /* ── Table ── */
    table {
      width: 100%;
      border-collapse: collapse;
      font-size: 12px;
    }
    th, td {
      padding: 6px 10px;
      text-align: left;
      border-bottom: 1px solid var(--border);
      vertical-align: middle;
    }
    th { color: var(--text3); font-weight: 600; font-size: 10px;
         text-transform: uppercase; letter-spacing: .06em;
         background: var(--bg2); position: sticky; top: 0; z-index: 1; }
    tr:hover td { background: var(--bg2); }
    tr.clickable { cursor: pointer; }
    td.dim { color: var(--text3); }
    td.right { text-align: right; }

    .pill { font-size: 10px; padding: 2px 6px; border-radius: 3px;
            text-transform: uppercase; letter-spacing: .04em; }
    .pill-evaluated { background: #1e3a2f; color: var(--c-good); }
    .pill-pending   { background: #3a3a1e; color: var(--c-warn); }
    .pill-stale     { background: #2d1e3a; color: var(--c-stale); }
    .pill-running   { background: #1e2d40; color: var(--c-user); }
    .pill-failed    { background: #3a1e2d; color: var(--c-bad); }
    .pill-ok        { background: #1e3a2f; color: var(--c-good); }

    .session-id-mono { font-size: 11px; color: var(--text2); }

    /* ── Detail tab ── */
    .meta-grid {
      display: grid;
      grid-template-columns: 140px 1fr;
      gap: 4px 16px;
      font-size: 12px;
      margin-bottom: 18px;
    }
    .meta-grid dt { color: var(--text3); }
    .meta-grid dd { color: var(--text); word-break: break-all; }

    .eval-run {
      border: 1px solid var(--border);
      border-radius: 6px;
      margin-bottom: 14px;
      background: var(--bg2);
      overflow: hidden;
    }
    .eval-run-head {
      padding: 8px 12px;
      background: var(--bg3);
      border-bottom: 1px solid var(--border);
      font-size: 11px;
      color: var(--text2);
      display: flex; gap: 12px; flex-wrap: wrap;
    }
    .eval-run-head b { color: var(--text); }

    .axis-row {
      display: grid;
      grid-template-columns: 200px 80px 80px 80px 80px;
      gap: 8px;
      padding: 4px 12px;
      font-size: 11px;
      align-items: center;
    }
    .axis-row.head { color: var(--text3); border-bottom: 1px solid var(--border); padding-bottom: 6px; padding-top: 6px; }
    .axis-row .axis-name { color: var(--text2); }
    .axis-row .score { text-align: right; }
    .axis-row .score.avg { color: var(--accent); font-weight: 600; }
    .axis-row.spread { background: var(--bg3); border-top: 1px solid var(--border); }

    .comments {
      padding: 6px 12px 10px;
      border-top: 1px solid var(--border);
    }
    .comments .comment {
      font-size: 11px;
      color: var(--text);
      margin: 6px 0;
      padding-left: 12px;
      border-left: 2px solid var(--border2);
    }
    .comments .comment-by { color: var(--text3); font-size: 10px; text-transform: uppercase; letter-spacing: .06em; margin-right: 6px; }

    /* ── Stats tab ── */
    .stats-table th { text-align: right; }
    .stats-table th:first-child, .stats-table td:first-child { text-align: left; }
    .stats-table td.bucket-cell { color: var(--text2); }

    /* ── Run tab ── */
    .run-form {
      display: grid;
      grid-template-columns: 140px 1fr;
      gap: 8px 12px;
      margin-bottom: 14px;
      max-width: 580px;
    }
    .run-form label { font-size: 11px; color: var(--text3); align-self: center; }
    .run-form input, .run-form select { width: 100%; }

    .run-status {
      border: 1px solid var(--border);
      border-radius: 6px;
      padding: 10px 12px;
      background: var(--bg2);
      margin-top: 12px;
      font-size: 11px;
    }

    .placeholder {
      text-align: center; color: var(--text3); padding: 60px 0; font-size: 12px;
    }
    .error-msg { color: var(--c-bad); padding: 12px; font-size: 12px; }
  </style>
</head>
<body>

<header>
  <div class="logo"><b>NAVI</b> EVAL</div>
  <div class="tabs">
    <button class="tab active" data-tab="sessions">Sessions</button>
    <button class="tab" data-tab="detail">Detail</button>
    <button class="tab" data-tab="stats">Stats</button>
    <button class="tab" data-tab="run">Run</button>
  </div>
  <div class="header-right">
    <span id="version-badge">judge=… rubric=…</span>
    <a href="/#/admin">Admin</a>
    <a href="/debug">/debug</a>
    <a href="/">app</a>
  </div>
</header>

<main>

  <!-- ── Sessions tab ────────────────────────────────────────────────── -->
  <div id="tab-sessions" class="pane active">
    <div class="filters">
      <label>profile</label>
      <select id="filter-profile">
        <option value="">all</option>
      </select>
      <label>status</label>
      <select id="filter-status">
        <option value="">all</option>
        <option value="evaluated">evaluated</option>
        <option value="pending">pending</option>
        <option value="stale">stale</option>
      </select>
      <label>limit</label>
      <select id="filter-limit">
        <option>50</option>
        <option>100</option>
        <option>200</option>
      </select>
      <button id="btn-refresh">refresh</button>
      <span id="sessions-count" style="margin-left:auto; color: var(--text3); font-size: 11px;"></span>
    </div>
    <div id="sessions-table-wrap">
      <table>
        <thead>
          <tr>
            <th>started</th>
            <th>profile</th>
            <th>session</th>
            <th>name</th>
            <th class="right">msgs</th>
            <th class="right">👍</th>
            <th class="right">👎</th>
            <th>status</th>
            <th class="right">avg goal</th>
            <th class="right">avg tools</th>
            <th class="right">avg comm</th>
          </tr>
        </thead>
        <tbody id="sessions-tbody"></tbody>
      </table>
    </div>
  </div>

  <!-- ── Detail tab ──────────────────────────────────────────────────── -->
  <div id="tab-detail" class="pane">
    <div class="filters">
      <label>session id</label>
      <input id="detail-session-id" placeholder="paste session id" style="flex: 1; max-width: 380px;">
      <button id="btn-load-detail">load</button>
      <button id="btn-detail-evaluate">evaluate this session</button>
    </div>
    <div id="detail-body"><div class="placeholder">Pick a session from the Sessions tab or paste an id.</div></div>
  </div>

  <!-- ── Stats tab ───────────────────────────────────────────────────── -->
  <div id="tab-stats" class="pane">
    <div class="filters">
      <label>days</label>
      <select id="stats-days">
        <option>7</option>
        <option>14</option>
        <option selected>30</option>
        <option>60</option>
        <option>90</option>
      </select>
      <label>
        <input type="checkbox" id="stats-by-bucket"> split by complexity bucket
      </label>
      <button id="btn-load-stats">load</button>
    </div>
    <div id="stats-body"><div class="placeholder">Press load to fetch.</div></div>
  </div>

  <!-- ── Run tab ─────────────────────────────────────────────────────── -->
  <div id="tab-run" class="pane">
    <h2>Trigger eval run</h2>
    <div class="run-form">
      <label>scope</label>
      <select id="run-scope">
        <option value="unevaluated">unevaluated (current rubric)</option>
        <option value="session">single session</option>
        <option value="all">all sessions (re-eval)</option>
      </select>
      <label>session id</label>
      <input id="run-session-id" placeholder="only for scope=session">
      <label>since</label>
      <input id="run-since" type="datetime-local">
      <label>limit</label>
      <input id="run-limit" type="number" min="1" placeholder="(optional)">
      <label>model</label>
      <input id="run-model" value="gemma4:31b-cloud">
      <label>backend</label>
      <input id="run-backend" value="ollama">
      <span></span>
      <div>
        <button id="btn-trigger-run" class="primary">start run</button>
      </div>
    </div>

    <h3>Active / latest run</h3>
    <div id="run-active"><div class="placeholder">No active run.</div></div>

    <h3>Run history</h3>
    <div id="run-history"><div class="placeholder">No runs yet.</div></div>
  </div>

</main>

<script>
'use strict';

// ── tiny utils ─────────────────────────────────────────────────────────
const $  = (sel, root = document) => root.querySelector(sel);
const $$ = (sel, root = document) => Array.from(root.querySelectorAll(sel));
const e = (tag, attrs = {}, ...children) => {
  const el = document.createElement(tag);
  for (const [k, v] of Object.entries(attrs)) {
    if (k === 'class') el.className = v;
    else if (k === 'dataset') Object.assign(el.dataset, v);
    else if (k.startsWith('on') && typeof v === 'function') el.addEventListener(k.slice(2), v);
    else if (v != null) el.setAttribute(k, v);
  }
  for (const c of children.flat()) {
    if (c == null || c === false) continue;
    el.append(c instanceof Node ? c : document.createTextNode(String(c)));
  }
  return el;
};
const fmtDate = iso => iso ? new Date(iso).toLocaleString('sv-SE').replace('T', ' ').slice(0, 16) : '—';
const fmtDateShort = iso => iso ? new Date(iso).toLocaleDateString('sv-SE') + ' ' + new Date(iso).toLocaleTimeString('sv-SE').slice(0, 5) : '—';
const shortId = id => id ? id.slice(0, 8) : '—';
const numOrDash = v => (v == null ? '—' : v);

const api = {
  async get(path) {
    const r = await fetch(path);
    if (!r.ok) throw new Error(`${r.status} ${r.statusText}: ${await r.text()}`);
    return r.json();
  },
  async post(path, body) {
    const r = await fetch(path, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(body),
    });
    if (!r.ok) throw new Error(`${r.status} ${r.statusText}: ${await r.text()}`);
    return r.json();
  },
  sessions(params)  { return api.get(`/eval/sessions?${new URLSearchParams(params)}`); },
  session(id)       { return api.get(`/eval/sessions/${id}`); },
  stats(params)     { return api.get(`/eval/stats?${new URLSearchParams(params)}`); },
  startRun(body)    { return api.post('/eval/run', body); },
  run(id)           { return api.get(`/eval/run/${id}`); },
  runs()            { return api.get('/eval/runs'); },
  profiles()        { return api.get('/agents/profiles').catch(() => []); },
};

// ── tab routing ────────────────────────────────────────────────────────
function activateTab(name) {
  $$('.tab').forEach(t => t.classList.toggle('active', t.dataset.tab === name));
  $$('.pane').forEach(p => p.classList.toggle('active', p.id === `tab-${name}`));
  if (name === 'sessions') loadSessions();
  if (name === 'stats')    loadStats();
  if (name === 'run')      loadRunHistory();
}

$$('.tab').forEach(btn => btn.addEventListener('click', () => {
  const t = btn.dataset.tab;
  history.replaceState(null, '', `#${t}`);
  activateTab(t);
}));

// hash routing: #detail/<id>
function applyHash() {
  const m = location.hash.match(/^#(\w+)(?:\/(.+))?/);
  if (!m) { activateTab('sessions'); return; }
  const tab = m[1];
  if (tab === 'detail' && m[2]) {
    activateTab('detail');
    $('#detail-session-id').value = decodeURIComponent(m[2]);
    loadDetail();
  } else {
    activateTab(tab);
  }
}
window.addEventListener('hashchange', applyHash);

// ── version badge ─────────────────────────────────────────────────────
async function loadVersionBadge() {
  // Read from a sample session response or from a config endpoint.
  // Simple: poll one stats call — it always returns; if no rows we still
  // hardcode display from any session row that comes back.
  try {
    const rows = await api.sessions({ limit: 1, status: 'evaluated' });
    const r = rows[0];
    if (r) {
      $('#version-badge').textContent =
        `judge=${r.latest_judge_version || '?'} rubric=${r.latest_rubric_version || '?'}`;
      return;
    }
  } catch (_) {}
  $('#version-badge').textContent = 'judge=v1 rubric=v1';
}

// ── populate profile filter ───────────────────────────────────────────
async function loadProfileFilter() {
  try {
    const profiles = await api.profiles();
    const sel = $('#filter-profile');
    for (const p of profiles) {
      sel.appendChild(e('option', { value: p.id }, p.id));
    }
  } catch (_) {}
}

// ── Sessions tab ──────────────────────────────────────────────────────
async function loadSessions() {
  const tbody = $('#sessions-tbody');
  tbody.innerHTML = '';
  const params = {
    limit: $('#filter-limit').value,
    offset: 0,
  };
  const profile = $('#filter-profile').value;
  const status  = $('#filter-status').value;
  if (profile) params.profile = profile;
  if (status)  params.status  = status;
  try {
    const rows = await api.sessions(params);
    $('#sessions-count').textContent = `${rows.length} row${rows.length === 1 ? '' : 's'}`;
    if (rows.length === 0) {
      tbody.appendChild(e('tr', {}, e('td', { colspan: 11, class: 'dim' }, 'No sessions matched.')));
      return;
    }
    for (const r of rows) {
      const avg = r.latest_avg || {};
      const tr = e('tr', {
        class: 'clickable',
        onclick: () => { location.hash = `#detail/${r.session_id}`; },
      },
        e('td', { class: 'dim' }, fmtDateShort(r.created_at)),
        e('td', {}, r.profile_id),
        e('td', { class: 'session-id-mono' }, shortId(r.session_id)),
        e('td', {}, r.name || ''),
        e('td', { class: 'right' }, r.msg_count),
        e('td', { class: 'right' }, r.likes || ''),
        e('td', { class: 'right' }, r.dislikes || ''),
        e('td', {}, e('span', { class: `pill pill-${r.eval_status}` }, r.eval_status)),
        e('td', { class: 'right' }, numOrDash(avg.goal_completion)),
        e('td', { class: 'right' }, numOrDash(avg.tool_usage_quality)),
        e('td', { class: 'right' }, numOrDash(avg.communication)),
      );
      tbody.appendChild(tr);
    }
  } catch (err) {
    tbody.appendChild(e('tr', {}, e('td', { colspan: 11, class: 'error-msg' }, `Error: ${err.message}`)));
  }
}

$('#btn-refresh').addEventListener('click', loadSessions);
$('#filter-profile').addEventListener('change', loadSessions);
$('#filter-status').addEventListener('change', loadSessions);
$('#filter-limit').addEventListener('change', loadSessions);

// ── Detail tab ────────────────────────────────────────────────────────
async function loadDetail() {
  const id = $('#detail-session-id').value.trim();
  const body = $('#detail-body');
  if (!id) { body.innerHTML = '<div class="placeholder">Enter a session id.</div>'; return; }
  body.innerHTML = '<div class="placeholder">Loading…</div>';
  try {
    const d = await api.session(id);
    const meta = e('dl', { class: 'meta-grid' },
      e('dt', {}, 'session id'), e('dd', { class: 'session-id-mono' }, d.session_id),
      e('dt', {}, 'profile'),    e('dd', {}, d.profile_id),
      e('dt', {}, 'name'),       e('dd', {}, d.name || '—'),
      e('dt', {}, 'started'),    e('dd', {}, fmtDate(d.created_at)),
      e('dt', {}, 'last active'),e('dd', {}, fmtDate(d.last_active)),
      e('dt', {}, 'messages'),   e('dd', {}, d.msg_count),
      e('dt', {}, 'feedback'),   e('dd', {}, `${d.feedback.filter(f => f.rating === 1).length} 👍 / ${d.feedback.filter(f => f.rating === -1).length} 👎`),
      e('dt', {}, 'evaluations'),e('dd', {}, d.evaluations.length),
    );
    body.innerHTML = '';
    body.appendChild(meta);
    if (d.evaluations.length === 0) {
      body.appendChild(e('div', { class: 'placeholder' }, 'No evaluations stored.'));
      return;
    }
    body.appendChild(renderEvalRuns(d.evaluations));
  } catch (err) {
    body.innerHTML = '';
    body.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
  }
}

function renderEvalRuns(rows) {
  // group by eval_run_id, preserve eval_date desc
  const byRun = new Map();
  for (const r of rows) {
    const k = r.eval_run_id;
    if (!byRun.has(k)) byRun.set(k, []);
    byRun.get(k).push(r);
  }
  const wrap = e('div');
  for (const [runId, group] of byRun) {
    const head = group[0];
    const head_el = e('div', { class: 'eval-run-head' },
      e('span', {}, e('b', {}, fmtDate(head.eval_date))),
      e('span', {}, `judge=${head.judge_model} (${head.judge_version})`),
      e('span', {}, `rubric=${head.rubric_version}`),
      e('span', { class: 'session-id-mono' }, `run=${runId.slice(0, 8)}`),
    );
    const axes = ['task_complexity','goal_completion','tool_usage_quality',
                  'efficiency','communication','subagent_orchestration','self_extension'];
    const expertCols = group.map(g => g.expert_id);

    const headerRow = e('div', { class: 'axis-row head' },
      e('span', { class: 'axis-name' }, 'axis'),
      ...expertCols.map(eid => e('span', { class: 'score' }, eid.split('_').pop())),
      e('span', { class: 'score' }, 'avg'),
    );
    const rowsEl = [headerRow];

    for (const axis of axes) {
      const vals = group.map(g => g.scores[axis]);
      const nonNull = vals.filter(v => v != null);
      const avg = nonNull.length ? Math.round(nonNull.reduce((s,v) => s + v, 0) / nonNull.length) : null;
      rowsEl.push(e('div', { class: 'axis-row' },
        e('span', { class: 'axis-name' }, axis),
        ...vals.map(v => e('span', { class: 'score' }, numOrDash(v))),
        e('span', { class: 'score avg' }, numOrDash(avg)),
      ));
    }

    const comments = e('div', { class: 'comments' },
      ...group.map(g => e('div', { class: 'comment' },
        e('span', { class: 'comment-by' }, g.expert_id),
        g.comment,
      )),
    );

    const block = e('div', { class: 'eval-run' }, head_el, ...rowsEl, comments);
    wrap.appendChild(block);
  }
  return wrap;
}

$('#btn-load-detail').addEventListener('click', () => {
  const id = $('#detail-session-id').value.trim();
  if (id) location.hash = `#detail/${id}`;
});
$('#btn-detail-evaluate').addEventListener('click', async () => {
  const id = $('#detail-session-id').value.trim();
  if (!id) return;
  await api.startRun({
    scope: 'session', session_id: id,
    model: $('#run-model').value || 'gemma4:31b-cloud',
    backend: $('#run-backend').value || 'ollama',
  });
  alert('Run started — check the Run tab for progress.');
});

// ── Stats tab ─────────────────────────────────────────────────────────
async function loadStats() {
  const body = $('#stats-body');
  body.innerHTML = '<div class="placeholder">Loading…</div>';
  try {
    const params = {
      days: $('#stats-days').value,
      by_complexity_bucket: $('#stats-by-bucket').checked ? 'true' : 'false',
    };
    const data = await api.stats(params);
    body.innerHTML = '';
    if (data.weekly.length === 0) {
      body.appendChild(e('div', { class: 'placeholder' }, 'No evaluations in this window.'));
      return;
    }
    const axes = ['task_complexity','goal_completion','tool_usage_quality',
                  'efficiency','communication','subagent_orchestration','self_extension'];
    const tbl = e('table', { class: 'stats-table' });
    const thead = e('thead', {}, e('tr', {},
      e('th', {}, 'week'),
      e('th', {}, 'bucket'),
      e('th', {}, 'n'),
      ...axes.map(a => e('th', {}, a.replace(/_/g, ' '))),
    ));
    const tbody = e('tbody');
    for (const w of data.weekly) {
      tbody.appendChild(e('tr', {},
        e('td', {}, w.week_start.slice(0, 10)),
        e('td', { class: 'bucket-cell' }, w.bucket),
        e('td', { class: 'right dim' }, w.sample_count),
        ...axes.map(a => e('td', { class: 'right' }, numOrDash(w.axis_means[a]))),
      ));
    }
    tbl.appendChild(thead);
    tbl.appendChild(tbody);
    body.appendChild(tbl);
  } catch (err) {
    body.innerHTML = '';
    body.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
  }
}

$('#btn-load-stats').addEventListener('click', loadStats);

// ── Run tab ───────────────────────────────────────────────────────────
let activeRunId = null;
let runPollTimer = null;

async function loadRunHistory() {
  const wrap = $('#run-history');
  try {
    const runs = await api.runs();
    if (runs.length === 0) {
      wrap.innerHTML = '<div class="placeholder">No runs yet.</div>';
      return;
    }
    wrap.innerHTML = '';
    const tbl = e('table', {});
    tbl.appendChild(e('thead', {}, e('tr', {},
      e('th', {}, 'started'),
      e('th', {}, 'state'),
      e('th', {}, 'judge_model'),
      e('th', {}, 'sessions'),
      e('th', {}, 'ok / fail'),
      e('th', {}, 'run_id'),
    )));
    const tbody = e('tbody');
    for (const r of runs) {
      const ok = (r.sessions || []).filter(s => s.state === 'ok').length;
      const fail = (r.sessions || []).filter(s => s.state === 'failed').length;
      tbody.appendChild(e('tr', {
        class: 'clickable',
        onclick: () => attachActiveRun(r.run_id),
      },
        e('td', { class: 'dim' }, fmtDateShort(r.started_at)),
        e('td', {}, e('span', { class: `pill pill-${r.state}` }, r.state)),
        e('td', {}, r.judge_model),
        e('td', { class: 'right' }, (r.sessions || []).length),
        e('td', {}, `${ok} / ${fail}`),
        e('td', { class: 'session-id-mono' }, shortId(r.run_id)),
      ));
    }
    tbl.appendChild(tbody);
    wrap.appendChild(tbl);
  } catch (err) {
    wrap.innerHTML = '';
    wrap.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
  }
}

function attachActiveRun(runId) {
  if (runPollTimer) { clearInterval(runPollTimer); runPollTimer = null; }
  activeRunId = runId;
  pollActiveRun();
  runPollTimer = setInterval(pollActiveRun, 2500);
}

async function pollActiveRun() {
  if (!activeRunId) return;
  const wrap = $('#run-active');
  try {
    const s = await api.run(activeRunId);
    wrap.innerHTML = '';
    const head = e('div', { class: 'run-status' },
      e('div', {},
        e('b', {}, `run ${s.run_id.slice(0, 8)}`),
        '  ',
        e('span', { class: `pill pill-${s.state}` }, s.state),
        '  ',
        e('span', { class: 'dim' }, `started ${fmtDate(s.started_at)}`),
        s.finished_at ? e('span', { class: 'dim' }, `  finished ${fmtDate(s.finished_at)}`) : null,
      ),
      e('div', { style: 'margin-top:6px;' },
        `judge=${s.judge_model} (${s.judge_version}) rubric=${s.rubric_version}`,
      ),
      e('table', { style: 'margin-top: 10px;' },
        e('thead', {}, e('tr', {},
          e('th', {}, 'session'),
          e('th', {}, 'state'),
          e('th', {}, 'avg'),
          e('th', {}, 'note'),
        )),
        e('tbody', {},
          ...s.sessions.map(ss => e('tr', {},
            e('td', { class: 'session-id-mono' }, shortId(ss.session_id)),
            e('td', {}, e('span', { class: `pill pill-${ss.state}` }, ss.state)),
            e('td', {}, ss.avg ? `goal=${ss.avg.goal_completion} tools=${ss.avg.tool_usage_quality} comm=${ss.avg.communication}` : ''),
            e('td', { class: 'dim' }, ss.error || ''),
          )),
        ),
      ),
    );
    wrap.appendChild(head);

    if (s.state !== 'running') {
      clearInterval(runPollTimer);
      runPollTimer = null;
      loadRunHistory();
    }
  } catch (err) {
    wrap.innerHTML = '';
    wrap.appendChild(e('div', { class: 'error-msg' }, `Error: ${err.message}`));
  }
}

$('#btn-trigger-run').addEventListener('click', async () => {
  const body = {
    scope: $('#run-scope').value,
    session_id: $('#run-session-id').value.trim() || null,
    since: $('#run-since').value ? new Date($('#run-since').value).toISOString() : null,
    limit: $('#run-limit').value ? Number($('#run-limit').value) : null,
    model: $('#run-model').value || 'gemma4:31b-cloud',
    backend: $('#run-backend').value || 'ollama',
  };
  // Strip nulls — RunRequest fields are optional but must serialise cleanly.
  for (const k of Object.keys(body)) if (body[k] == null || body[k] === '') delete body[k];
  try {
    const status = await api.startRun(body);
    attachActiveRun(status.run_id);
  } catch (err) {
    alert(`Failed to start run: ${err.message}`);
  }
});

// ── boot ──────────────────────────────────────────────────────────────
loadVersionBadge();
loadProfileFilter();
applyHash();

</script>
</body>
</html>