Newer
Older
navi-1 / docs / visual.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Navi — Architecture & Reference</title>
<style>
:root {
  --bg: #0d1117;
  --bg2: #161b22;
  --bg3: #21262d;
  --border: #30363d;
  --text: #e6edf3;
  --muted: #8b949e;
  --accent: #58a6ff;
  --green: #3fb950;
  --orange: #d29922;
  --purple: #bc8cff;
  --red: #f85149;
  --cyan: #39d353;
  --yellow: #e3b341;
}

* { box-sizing: border-box; margin: 0; padding: 0; }

body {
  background: var(--bg);
  color: var(--text);
  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
  font-size: 14px;
  line-height: 1.6;
  display: flex;
}

/* ── Sidebar ── */
nav {
  width: 240px;
  min-width: 240px;
  background: var(--bg2);
  border-right: 1px solid var(--border);
  height: 100vh;
  position: sticky;
  top: 0;
  overflow-y: auto;
  padding: 20px 0;
}

nav .logo {
  padding: 0 20px 16px;
  border-bottom: 1px solid var(--border);
  margin-bottom: 12px;
}

nav .logo h1 {
  font-size: 20px;
  font-weight: 700;
  color: var(--accent);
  letter-spacing: -0.5px;
}

nav .logo p {
  font-size: 11px;
  color: var(--muted);
  margin-top: 2px;
}

nav a {
  display: block;
  padding: 6px 20px;
  color: var(--muted);
  text-decoration: none;
  font-size: 13px;
  border-left: 2px solid transparent;
  transition: all 0.15s;
}

nav a:hover { color: var(--text); background: var(--bg3); }
nav a.active { color: var(--accent); border-left-color: var(--accent); background: rgba(88,166,255,0.06); }
nav .section-label {
  padding: 14px 20px 4px;
  font-size: 10px;
  font-weight: 600;
  text-transform: uppercase;
  letter-spacing: 0.8px;
  color: var(--muted);
}

/* ── Main ── */
main {
  flex: 1;
  overflow-y: auto;
  height: 100vh;
  padding: 32px 48px;
  max-width: 1100px;
}

section { margin-bottom: 64px; scroll-margin-top: 24px; }

h2 {
  font-size: 22px;
  font-weight: 700;
  color: var(--text);
  margin-bottom: 16px;
  padding-bottom: 10px;
  border-bottom: 1px solid var(--border);
  display: flex;
  align-items: center;
  gap: 10px;
}

h3 { font-size: 15px; font-weight: 600; color: var(--text); margin: 24px 0 10px; }
h4 { font-size: 13px; font-weight: 600; color: var(--muted); margin: 16px 0 8px; text-transform: uppercase; letter-spacing: 0.5px; }

p { color: var(--muted); margin-bottom: 10px; }
p strong { color: var(--text); }

/* ── Cards ── */
.cards { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 12px; margin: 16px 0; }

.card {
  background: var(--bg2);
  border: 1px solid var(--border);
  border-radius: 8px;
  padding: 14px 16px;
}

.card .label { font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 4px; }
.card .value { font-size: 14px; font-weight: 600; color: var(--text); }
.card .sub { font-size: 11px; color: var(--muted); margin-top: 2px; }

/* ── Tables ── */
.table-wrap { overflow-x: auto; margin: 12px 0; }

table {
  width: 100%;
  border-collapse: collapse;
  font-size: 13px;
}

th {
  background: var(--bg3);
  color: var(--muted);
  font-size: 11px;
  font-weight: 600;
  text-transform: uppercase;
  letter-spacing: 0.5px;
  padding: 8px 12px;
  text-align: left;
  border-bottom: 1px solid var(--border);
}

td {
  padding: 8px 12px;
  border-bottom: 1px solid var(--border);
  color: var(--text);
  vertical-align: top;
}

tr:last-child td { border-bottom: none; }
tr:hover td { background: rgba(255,255,255,0.02); }

td code, th code { font-family: 'SF Mono', Consolas, monospace; font-size: 12px; }

/* ── Code ── */
code {
  font-family: 'SF Mono', Consolas, 'Courier New', monospace;
  font-size: 12px;
  background: var(--bg3);
  border: 1px solid var(--border);
  border-radius: 4px;
  padding: 1px 5px;
  color: var(--cyan);
}

pre {
  background: var(--bg2);
  border: 1px solid var(--border);
  border-radius: 8px;
  padding: 16px;
  overflow-x: auto;
  margin: 12px 0;
  font-family: 'SF Mono', Consolas, monospace;
  font-size: 12px;
  line-height: 1.7;
  color: var(--text);
}

pre code { background: none; border: none; padding: 0; color: inherit; }

/* ── Architecture diagram ── */
.arch-diagram {
  background: var(--bg2);
  border: 1px solid var(--border);
  border-radius: 8px;
  padding: 24px;
  margin: 16px 0;
  display: flex;
  flex-direction: column;
  align-items: center;
  gap: 0;
}

.arch-layer {
  width: 100%;
  max-width: 760px;
}

.arch-box {
  background: var(--bg3);
  border: 1px solid var(--border);
  border-radius: 8px;
  padding: 12px 16px;
  margin: 0 0 4px;
}

.arch-box .arch-title {
  font-size: 11px;
  font-weight: 600;
  text-transform: uppercase;
  letter-spacing: 0.6px;
  color: var(--muted);
  margin-bottom: 8px;
}

.arch-row {
  display: flex;
  gap: 8px;
  flex-wrap: wrap;
}

.arch-chip {
  background: var(--bg);
  border: 1px solid var(--border);
  border-radius: 6px;
  padding: 5px 10px;
  font-size: 12px;
  font-weight: 500;
  color: var(--text);
  white-space: nowrap;
}

.arch-chip.blue { border-color: var(--accent); color: var(--accent); }
.arch-chip.green { border-color: var(--green); color: var(--green); }
.arch-chip.purple { border-color: var(--purple); color: var(--purple); }
.arch-chip.orange { border-color: var(--orange); color: var(--orange); }
.arch-chip.red { border-color: var(--red); color: var(--red); }
.arch-chip.cyan { border-color: var(--cyan); color: var(--cyan); }

.arch-arrow {
  text-align: center;
  color: var(--muted);
  font-size: 18px;
  line-height: 1;
  padding: 2px 0;
}

/* ── Flow diagram ── */
.flow {
  display: flex;
  flex-direction: column;
  gap: 0;
  margin: 16px 0;
  max-width: 620px;
}

.flow-step {
  display: flex;
  align-items: flex-start;
  gap: 12px;
  padding: 10px 0;
  position: relative;
}

.flow-step:not(:last-child)::after {
  content: '';
  position: absolute;
  left: 15px;
  top: 38px;
  bottom: -8px;
  width: 2px;
  background: var(--border);
}

.flow-num {
  width: 30px;
  height: 30px;
  min-width: 30px;
  border-radius: 50%;
  background: var(--bg3);
  border: 1px solid var(--border);
  display: flex;
  align-items: center;
  justify-content: center;
  font-size: 12px;
  font-weight: 700;
  color: var(--accent);
  z-index: 1;
}

.flow-content { flex: 1; }
.flow-content strong { display: block; color: var(--text); font-size: 13px; margin-bottom: 2px; }
.flow-content span { color: var(--muted); font-size: 12px; }

/* ── Events table ── */
.event-type {
  font-family: 'SF Mono', Consolas, monospace;
  font-size: 11px;
  padding: 2px 7px;
  border-radius: 4px;
  font-weight: 600;
  display: inline-block;
}

.et-blue { background: rgba(88,166,255,0.12); color: var(--accent); }
.et-green { background: rgba(63,185,80,0.12); color: var(--green); }
.et-orange { background: rgba(210,153,34,0.12); color: var(--orange); }
.et-purple { background: rgba(188,140,255,0.12); color: var(--purple); }
.et-muted { background: var(--bg3); color: var(--muted); }

/* ── Badge ── */
.badge {
  display: inline-block;
  font-size: 10px;
  font-weight: 600;
  padding: 2px 7px;
  border-radius: 12px;
  text-transform: uppercase;
  letter-spacing: 0.3px;
}

.badge-green { background: rgba(63,185,80,0.12); color: var(--green); }
.badge-blue { background: rgba(88,166,255,0.12); color: var(--accent); }
.badge-orange { background: rgba(210,153,34,0.12); color: var(--orange); }
.badge-muted { background: var(--bg3); color: var(--muted); }

/* ── Method badge ── */
.method {
  display: inline-block;
  font-size: 10px;
  font-weight: 700;
  padding: 2px 7px;
  border-radius: 4px;
  font-family: 'SF Mono', monospace;
  letter-spacing: 0.3px;
  min-width: 46px;
  text-align: center;
}

.get { background: rgba(63,185,80,0.15); color: var(--green); }
.post { background: rgba(88,166,255,0.15); color: var(--accent); }
.del { background: rgba(248,81,73,0.15); color: var(--red); }
.patch { background: rgba(210,153,34,0.15); color: var(--orange); }
.ws { background: rgba(188,140,255,0.15); color: var(--purple); }

/* ── Alert boxes ── */
.note {
  background: rgba(88,166,255,0.07);
  border: 1px solid rgba(88,166,255,0.25);
  border-radius: 8px;
  padding: 12px 16px;
  margin: 12px 0;
  font-size: 13px;
  color: var(--text);
}

.note.warn {
  background: rgba(210,153,34,0.07);
  border-color: rgba(210,153,34,0.25);
}

/* ── Context var table colors ── */
.cv { font-family: 'SF Mono', Consolas, monospace; font-size: 12px; color: var(--purple); }

/* ── Sequence diagram ── */
.seq {
  background: var(--bg2);
  border: 1px solid var(--border);
  border-radius: 8px;
  padding: 16px 20px;
  margin: 12px 0;
  font-family: 'SF Mono', Consolas, monospace;
  font-size: 12px;
  line-height: 2;
}

.seq .s-start { color: var(--muted); }
.seq .s-thinking { color: var(--purple); }
.seq .s-tool { color: var(--orange); }
.seq .s-text { color: var(--green); }
.seq .s-end { color: var(--accent); }
.seq .s-sub { color: var(--yellow); padding-left: 20px; }
.seq .s-comment { color: var(--border); }

/* ── Section icon ── */
h2 .icon {
  width: 28px;
  height: 28px;
  border-radius: 6px;
  display: flex;
  align-items: center;
  justify-content: center;
  font-size: 16px;
  flex-shrink: 0;
}

/* ── Scrollbar ── */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
::-webkit-scrollbar-thumb:hover { background: var(--muted); }

/* ── Two columns ── */
.two-col { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin: 16px 0; }
@media (max-width: 800px) { .two-col { grid-template-columns: 1fr; } }
</style>
</head>
<body>

<!-- Sidebar -->
<nav id="nav">
  <div class="logo">
    <h1>Navi</h1>
    <p>Backend Architecture Reference</p>
  </div>

  <div class="section-label">Overview</div>
  <a href="#overview">Project Overview</a>
  <a href="#stack">Stack</a>

  <div class="section-label">Architecture</div>
  <a href="#components">Component Map</a>
  <a href="#lifecycle">Request Lifecycle</a>
  <a href="#contextvars">Context Vars</a>

  <div class="section-label">Core</div>
  <a href="#agent">Agent Loop</a>
  <a href="#planning">Planning Phase</a>
  <a href="#sessions">Sessions</a>
  <a href="#compression">Context Compression</a>

  <div class="section-label">Tools</div>
  <a href="#builtin-tools">Built-in Tools</a>
  <a href="#user-tools">User Tools</a>
  <a href="#tool-format">Tool Format</a>

  <div class="section-label">Communication</div>
  <a href="#ws-protocol">WebSocket Protocol</a>
  <a href="#ws-events">Events Reference</a>
  <a href="#ws-sequences">Typical Sequences</a>
  <a href="#rest-api">REST API</a>

  <div class="section-label">Other</div>
  <a href="#profiles">Profiles</a>
  <a href="#memory">Memory System</a>
  <a href="#config">Configuration</a>
</nav>

<!-- Main -->
<main id="main">

<!-- ─── OVERVIEW ─── -->
<section id="overview">
  <h2><span class="icon" style="background:rgba(88,166,255,0.12)">🧭</span> Project Overview</h2>
  <p>Navi is a personal modular AI agent system. FastAPI backend + vanilla JS client. The agent is named Navi — female personal assistant. Runs locally via Ollama.</p>

  <div class="cards">
    <div class="card">
      <div class="label">Entry point</div>
      <div class="value">navi/main.py</div>
      <div class="sub">FastAPI app</div>
    </div>
    <div class="card">
      <div class="label">Run command</div>
      <div class="value" style="font-size:11px">uvicorn navi.main:app</div>
      <div class="sub">--reload --port 8000</div>
    </div>
    <div class="card">
      <div class="label">Default model</div>
      <div class="value">gemma4:31b-cloud</div>
      <div class="sub">Ollama, 2B active params</div>
    </div>
    <div class="card">
      <div class="label">Context window</div>
      <div class="value">65 536 tokens</div>
      <div class="sub">OLLAMA_NUM_CTX</div>
    </div>
    <div class="card">
      <div class="label">Database</div>
      <div class="value">SQLite</div>
      <div class="sub">navi.db via aiosqlite</div>
    </div>
    <div class="card">
      <div class="label">Thinking</div>
      <div class="value">Enabled</div>
      <div class="sub">OLLAMA_THINK=true</div>
    </div>
  </div>
</section>

<!-- ─── STACK ─── -->
<section id="stack">
  <h2><span class="icon" style="background:rgba(63,185,80,0.12)">📦</span> Stack</h2>
  <div class="table-wrap">
    <table>
      <tr><th>Layer</th><th>Technology</th><th>Notes</th></tr>
      <tr><td>Web framework</td><td><code>FastAPI</code> + <code>uvicorn</code></td><td>ASGI, async throughout</td></tr>
      <tr><td>LLM backend (primary)</td><td><code>Ollama</code></td><td>Local, <code>OllamaBackend</code> in <code>navi/llm/ollama.py</code></td></tr>
      <tr><td>LLM backend (alt)</td><td>OpenAI-compatible</td><td><code>navi/llm/openai_backend.py</code></td></tr>
      <tr><td>Database</td><td><code>aiosqlite</code></td><td>Sessions + memory facts in <code>navi.db</code></td></tr>
      <tr><td>Config</td><td><code>pydantic-settings</code></td><td>Reads <code>.env</code>, typed <code>Settings</code> object</td></tr>
      <tr><td>Logging</td><td><code>structlog</code></td><td>Structured JSON-friendly logs</td></tr>
      <tr><td>Client</td><td>Vanilla JS ES modules</td><td>marked.js + highlight.js via esm.sh CDN</td></tr>
      <tr><td>Markdown rendering</td><td><code>marked.js</code></td><td>In browser, assistant messages</td></tr>
    </table>
  </div>
</section>

<!-- ─── COMPONENTS ─── -->
<section id="components">
  <h2><span class="icon" style="background:rgba(188,140,255,0.12)">🗂️</span> Component Map</h2>

  <div class="arch-diagram">
    <div class="arch-layer">
      <div class="arch-box">
        <div class="arch-title">Client (browser)</div>
        <div class="arch-row">
          <span class="arch-chip blue">WebSocket /ws/sessions/{id}</span>
          <span class="arch-chip">REST /sessions/*</span>
          <span class="arch-chip">REST /agents/*</span>
        </div>
      </div>
    </div>
    <div class="arch-arrow">↓</div>
    <div class="arch-layer">
      <div class="arch-box">
        <div class="arch-title">FastAPI — navi/main.py</div>
        <div class="arch-row">
          <span class="arch-chip blue">api/websocket.py · _AgentRun · stop endpoint</span>
          <span class="arch-chip">routes/sessions.py</span>
          <span class="arch-chip">routes/agents.py</span>
          <span class="arch-chip">routes/messages.py</span>
        </div>
      </div>
    </div>
    <div class="arch-arrow">↓</div>
    <div class="arch-layer">
      <div class="arch-box">
        <div class="arch-title">Agent — navi/core/agent.py</div>
        <div class="arch-row">
          <span class="arch-chip green">run_stream() → AsyncGenerator[AgentEvent]</span>
          <span class="arch-chip">run() → str</span>
          <span class="arch-chip purple">run_ephemeral() → str (subagent)</span>
          <span class="arch-chip orange">_run_planning()</span>
          <span class="arch-chip">_run_workers()</span>
        </div>
      </div>
    </div>
    <div class="arch-arrow">↓</div>
    <div class="arch-layer">
      <div class="arch-box">
        <div class="arch-title">Registries — navi/core/registry.py · build_default_registries()</div>
        <div class="arch-row">
          <span class="arch-chip blue">ToolRegistry</span>
          <span class="arch-chip green">ProfileRegistry</span>
          <span class="arch-chip purple">BackendRegistry</span>
        </div>
      </div>
    </div>
    <div class="arch-arrow">↓</div>
    <div class="arch-layer">
      <div class="arch-row" style="justify-content:space-between; gap:8px">
        <div class="arch-box" style="flex:1">
          <div class="arch-title">LLM Backend</div>
          <div class="arch-row">
            <span class="arch-chip orange">OllamaBackend</span>
            <span class="arch-chip">complete()</span>
            <span class="arch-chip">stream_complete()</span>
          </div>
        </div>
        <div class="arch-box" style="flex:1">
          <div class="arch-title">SessionStore (SQLite)</div>
          <div class="arch-row">
            <span class="arch-chip green">messages[]</span>
            <span class="arch-chip blue">context[]</span>
          </div>
        </div>
        <div class="arch-box" style="flex:1">
          <div class="arch-title">MemoryStore (SQLite)</div>
          <div class="arch-row">
            <span class="arch-chip purple">memory_facts</span>
            <span class="arch-chip">summary</span>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<!-- ─── LIFECYCLE ─── -->
<section id="lifecycle">
  <h2><span class="icon" style="background:rgba(210,153,34,0.12)">🔄</span> Request Lifecycle</h2>
  <p>Streaming flow from WebSocket message to final response.</p>
  <div class="flow">
    <div class="flow-step">
      <div class="flow-num">1</div>
      <div class="flow-content">
        <strong>Client sends message</strong>
        <span><code>{type:"message", content:"...", images:[...]}</code> over WebSocket</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">2</div>
      <div class="flow-content">
        <strong>websocket_session() creates _AgentRun</strong>
        <span>Subscribes a queue, launches <code>_run_agent()</code> as asyncio task, sends <code>stream_start</code></span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">3</div>
      <div class="flow-content">
        <strong>Pre-turn compression check</strong>
        <span>If <code>context_token_count ≥ num_ctx × threshold</code> → compress context before LLM call</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">4</div>
      <div class="flow-content">
        <strong>Planning phase</strong>
        <span>If <code>profile.planning_enabled</code>: fast non-streaming LLM call → yields <code>plan_ready</code> event if plan generated</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">5</div>
      <div class="flow-content">
        <strong>Tool-calling loop (max_iterations)</strong>
        <span>Calls <code>llm.stream_complete()</code> → yields thinking/text/tool events. Loops until <code>finish_reason=stop</code></span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">6</div>
      <div class="flow-content">
        <strong>StreamEnd + workers</strong>
        <span>Saves session to DB. Runs post-turn workers (compression). Yields <code>context_compressed</code> if triggered</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num" style="background:rgba(88,166,255,0.12); border-color:var(--accent)">✓</div>
      <div class="flow-content">
        <strong>Done</strong>
        <span>Events broadcast from <code>_AgentRun</code> to all subscriber queues → sent as JSON to WebSocket</span>
      </div>
    </div>
  </div>
</section>

<!-- ─── CONTEXT VARS ─── -->
<section id="contextvars">
  <h2><span class="icon" style="background:rgba(188,140,255,0.12)">🔗</span> Context Vars</h2>
  <p>Thread-safe async-safe state shared between Agent and tools. Defined in <code>navi/tools/base.py</code>.</p>
  <div class="table-wrap">
    <table>
      <tr><th>ContextVar</th><th>Type</th><th>Set by</th><th>Used by</th></tr>
      <tr>
        <td><span class="cv">current_session_id</span></td>
        <td><code>str | None</code></td>
        <td>Agent before each run</td>
        <td>SSH pool, scratchpad, todo — per-session state</td>
      </tr>
      <tr>
        <td><span class="cv">current_event_sink</span></td>
        <td><code>Queue | None</code></td>
        <td>run_stream() per tool task</td>
        <td>run_ephemeral() forwards sub-agent events to parent stream</td>
      </tr>
      <tr>
        <td><span class="cv">current_stop_event</span></td>
        <td><code>Event | None</code></td>
        <td>_run_agent() before run_stream()</td>
        <td>Agent loop checks before each LLM call and mid-stream</td>
      </tr>
    </table>
  </div>
  <div class="note warn">
    <strong>Never use task.cancel() for stopping generation.</strong> It corrupts Starlette's WebSocket receive state. Use <code>current_stop_event.set()</code> via <code>POST /sessions/{id}/stop</code>.
  </div>
</section>

<!-- ─── AGENT LOOP ─── -->
<section id="agent">
  <h2><span class="icon" style="background:rgba(63,185,80,0.12)">⚙️</span> Agent Loop</h2>
  <p>Three entry points in <code>navi/core/agent.py</code>:</p>
  <div class="table-wrap">
    <table>
      <tr><th>Method</th><th>Returns</th><th>Persistence</th><th>Planning</th></tr>
      <tr>
        <td><code>run(session_id, msg)</code></td>
        <td><code>str</code></td>
        <td>SQLite session</td>
        <td>No</td>
      </tr>
      <tr>
        <td><code>run_stream(session_id, msg)</code></td>
        <td><code>AsyncGenerator[AgentEvent]</code></td>
        <td>SQLite session</td>
        <td>Yes (if profile.planning_enabled)</td>
      </tr>
      <tr>
        <td><code>run_ephemeral(msg, profile_id)</code></td>
        <td><code>str</code></td>
        <td>In-memory only</td>
        <td>No</td>
      </tr>
    </table>
  </div>

  <h3>System prompt construction</h3>
  <p>Built fresh on every LLM call — never stored in session.context.</p>
  <pre><code>NAVI_PERSONA (global personality)
───────────────────────────────────────
profile.system_prompt (domain rules)
───────────────────────────────────────
[memory injection: "## What I remember about the user"]
───────────────────────────────────────
session.context messages (history, no system msgs)</code></pre>

  <h3>Sub-agent isolation</h3>
  <p><code>run_ephemeral()</code> sets <code>current_session_id = "subagent_&lt;uuid12&gt;"</code> so each subagent has its own isolated scratchpad and SSH connection pool entry.</p>
</section>

<!-- ─── PLANNING ─── -->
<section id="planning">
  <h2><span class="icon" style="background:rgba(210,153,34,0.12)">🗺️</span> Planning Phase</h2>
  <p>Runs before the tool-calling loop when <code>profile.planning_enabled = true</code>.</p>

  <div class="flow">
    <div class="flow-step">
      <div class="flow-num">1</div>
      <div class="flow-content">
        <strong>LLM call: decide or plan</strong>
        <span>Fast non-streaming call: <code>think=False</code>, <code>temperature=0.3</code>, no tools</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">2</div>
      <div class="flow-content">
        <strong>Response classification</strong>
        <span>Starts with <code>DIRECT</code> → skip planning. No numbered steps found → skip. Otherwise → real plan.</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">3</div>
      <div class="flow-content">
        <strong>Plan injection</strong>
        <span>Appended to <code>session.context</code> as assistant message — model continues from it naturally</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">4</div>
      <div class="flow-content">
        <strong>PlanReady event emitted</strong>
        <span>Rendered as collapsible 🗺️ card in UI before execution begins</span>
      </div>
    </div>
  </div>
</section>

<!-- ─── SESSIONS ─── -->
<section id="sessions">
  <h2><span class="icon" style="background:rgba(88,166,255,0.12)">💾</span> Sessions</h2>

  <h3>Session model (<code>navi/core/session.py</code>)</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Field</th><th>Type</th><th>Description</th></tr>
      <tr><td><code>id</code></td><td>UUID str</td><td>Unique session identifier</td></tr>
      <tr><td><code>profile_id</code></td><td>str</td><td>Active profile</td></tr>
      <tr><td><code>messages</code></td><td>list[Message]</td><td><span class="badge badge-green">Full history</span> Never compressed. Used for UI display.</td></tr>
      <tr><td><code>context</code></td><td>list[Message]</td><td><span class="badge badge-orange">LLM context</span> May be replaced by compression summary.</td></tr>
      <tr><td><code>context_token_count</code></td><td>int</td><td>Accumulated tokens; reset to 0 after compression</td></tr>
      <tr><td><code>pinned</code></td><td>bool</td><td>Pinned sessions appear first in sidebar</td></tr>
    </table>
  </div>

  <h3>Dual-buffer design</h3>
  <div class="note">
    <strong>Key invariant:</strong> <code>session.messages</code> is the full, unmodified conversation history — always available for display. <code>session.context</code> is what the LLM actually sees — may contain a compression summary instead of old messages.
  </div>

  <h3>Message format</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Field</th><th>Present on</th><th>Type</th></tr>
      <tr><td><code>role</code></td><td>all</td><td><code>user | assistant | tool | system</code></td></tr>
      <tr><td><code>content</code></td><td>most</td><td><code>str | None</code></td></tr>
      <tr><td><code>images</code></td><td>user, assistant</td><td><code>list[str]</code> — base64</td></tr>
      <tr><td><code>tool_calls</code></td><td>assistant (when calling tools)</td><td><code>list[ToolCallRequest]</code></td></tr>
      <tr><td><code>tool_call_id</code></td><td>tool results</td><td><code>str</code></td></tr>
      <tr><td><code>name</code></td><td>tool results</td><td>tool name</td></tr>
      <tr><td><code>is_summary</code></td><td>compressed blocks</td><td><code>bool</code></td></tr>
      <tr><td><code>created_at</code></td><td>user/assistant</td><td>ISO 8601 datetime</td></tr>
    </table>
  </div>
</section>

<!-- ─── COMPRESSION ─── -->
<section id="compression">
  <h2><span class="icon" style="background:rgba(210,153,34,0.12)">🗜️</span> Context Compression</h2>
  <p>Keeps the LLM context within the token budget. Only <code>session.context</code> is modified — <code>session.messages</code> is never touched.</p>

  <h3>Trigger points</h3>
  <div class="two-col">
    <div class="card">
      <div class="label">Pre-turn</div>
      <div class="value" style="font-size:13px">Before LLM call in run_stream()</div>
      <div class="sub">Checks context_token_count against threshold</div>
    </div>
    <div class="card">
      <div class="label">Post-turn (worker)</div>
      <div class="value" style="font-size:13px">After StreamEnd via CompressionWorker</div>
      <div class="sub">Re-checks and compresses if still needed</div>
    </div>
  </div>

  <h3>Algorithm</h3>
  <div class="flow">
    <div class="flow-step">
      <div class="flow-num">1</div>
      <div class="flow-content">
        <strong>Partition into turns</strong>
        <span>Keep last <code>context_keep_recent</code> turns verbatim. Tool call groups never split.</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">2</div>
      <div class="flow-content">
        <strong>Format old turns as text</strong>
        <span>Tool args truncated to 120 chars, results to 300 chars. Total input capped at 12 000 chars.</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">3</div>
      <div class="flow-content">
        <strong>Summarize with LLM</strong>
        <span><code>think=False</code>, bullet-point output. Same model — no model swap or extra loading.</span>
      </div>
    </div>
    <div class="flow-step">
      <div class="flow-num">4</div>
      <div class="flow-content">
        <strong>Replace with summary message</strong>
        <span><code>role=user, is_summary=True</code>. Result: <code>system_msgs + [summary] + recent_turns</code></span>
      </div>
    </div>
  </div>

  <h3>Config</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Setting</th><th>Default</th><th>Description</th></tr>
      <tr><td><code>CONTEXT_COMPRESSION_ENABLED</code></td><td><code>true</code></td><td>Enable/disable</td></tr>
      <tr><td><code>CONTEXT_COMPRESSION_THRESHOLD</code></td><td><code>0.80</code></td><td>Trigger at 80% of context window</td></tr>
      <tr><td><code>CONTEXT_KEEP_RECENT</code></td><td><code>10</code></td><td>Turns kept verbatim</td></tr>
      <tr><td><code>CONTEXT_SUMMARY_TEMPERATURE</code></td><td><code>0.3</code></td><td>Summarization temperature</td></tr>
    </table>
  </div>
</section>

<!-- ─── BUILT-IN TOOLS ─── -->
<section id="builtin-tools">
  <h2><span class="icon" style="background:rgba(63,185,80,0.12)">🔧</span> Built-in Tools</h2>
  <p>Registered in <code>build_default_registries()</code> as builtins. Never removed on hot-reload.</p>
  <div class="table-wrap">
    <table>
      <tr><th>Name</th><th>Class</th><th>Description</th></tr>
      <tr><td><code>mcp:navi-web:web_search</code></td><td>McpTool</td><td>Web search (SearXNG primary, DDG fallback, Brave tertiary)</td></tr>
      <tr><td><code>mcp:navi-web:web_view</code></td><td>McpTool</td><td>Open a URL in a headless browser and return clean text</td></tr>
      <tr><td><code>filesystem</code></td><td>FilesystemTool</td><td>Read/write/list local files (path allowlist via config)</td></tr>
      <tr><td><code>mcp:navi-web:http_request</code></td><td>McpTool</td><td>Raw HTTP request — GET/POST/PUT/PATCH/DELETE</td></tr>
      <tr><td><code>code_exec</code></td><td>CodeExecTool</td><td>Execute Python in a subprocess sandbox</td></tr>
      <tr><td><code>terminal</code></td><td>TerminalTool</td><td>Run shell commands (command allowlist via config)</td></tr>
      <tr><td><code>ssh_exec</code></td><td>SshExecTool</td><td>SSH into remote hosts; connection pool keyed by session ID</td></tr>
      <tr><td><code>image_view</code></td><td>ImageViewTool</td><td>Load image from path/URL → base64 for multimodal LLM</td></tr>
      <tr><td><code>todo</code></td><td>TodoTool</td><td>Per-session task checklist (set/update/read)</td></tr>
      <tr><td><code>scratchpad</code></td><td>ScratchpadTool</td><td>Per-session named working notes (write/append/read/clear)</td></tr>
      <tr><td><code>reload_tools</code></td><td>ReloadToolsTool</td><td>Hot-reload user tools without server restart</td></tr>
      <tr><td><code>write_tool</code></td><td>WriteToolTool</td><td>Write a new user tool file and reload immediately</td></tr>
      <tr><td><code>list_tools</code></td><td>ListToolsTool</td><td>Return the live tool list from registry</td></tr>
      <tr><td><code>tool_manual</code></td><td>ToolManualTool</td><td>Return manuals/{name}.md or auto-generate from schema</td></tr>
      <tr><td><code>memory_search</code></td><td>MemorySearchTool</td><td>Search long-term memory facts by keyword</td></tr>
      <tr><td><code>memory_forget</code></td><td>MemoryForgetTool</td><td>Delete a fact from long-term memory</td></tr>
      <tr><td><code>spawn_agent</code></td><td>SpawnAgentTool</td><td>Spawn an isolated subagent (blocking, synchronous)</td></tr>
      <tr><td><code>switch_profile</code></td><td>SwitchProfileTool</td><td>Switch the active profile for the session</td></tr>
    </table>
  </div>
</section>

<!-- ─── USER TOOLS ─── -->
<section id="user-tools">
  <h2><span class="icon" style="background:rgba(88,166,255,0.12)">🔌</span> User Tools</h2>

  <div class="two-col">
    <div>
      <h3>Discovery</h3>
      <ul style="padding-left:18px; color:var(--muted); font-size:13px; line-height:2">
        <li>Loaded from <code>tools/*.py</code> at startup</li>
        <li>Files starting with <code>_</code> are ignored</li>
        <li><code>tools/enabled.json</code> — names to include in all profiles</li>
        <li>Errors are isolated per file (one bad file ≠ failure)</li>
        <li>Hot-reload via <code>reload_tools</code> or after <code>write_tool</code></li>
      </ul>
    </div>
    <div>
      <h3>Current user tools</h3>
      <div style="margin-top:8px">
        <div class="card" style="margin-bottom:8px">
          <div class="label">get_current_datetime</div>
          <div class="value" style="font-size:13px">Returns current date/time</div>
        </div>
        <div class="card">
          <div class="label">user_notes</div>
          <div class="value" style="font-size:13px">Persistent personal notes store</div>
        </div>
      </div>
    </div>
  </div>

  <h3>Image tool → multimodal injection</h3>
  <p>When <code>image_view</code> succeeds, it returns <code>metadata={is_image: true, base64: "..."}</code>. The agent appends a synthetic user message with the image to <code>session.context</code> (not <code>messages</code>) — making it visible to the next LLM call without polluting display history.</p>
</section>

<!-- ─── TOOL FORMAT ─── -->
<section id="tool-format">
  <h2><span class="icon" style="background:rgba(188,140,255,0.12)">📝</span> Tool Format</h2>

  <h3>Module-level format (preferred for user tools)</h3>
  <pre><code>name = "my_tool"
description = "What it does and when to use it — be specific."
parameters = {
    "type": "object",
    "properties": {
        "param": {"type": "string", "description": "..."}
    },
    "required": ["param"]
}

async def execute(params: dict) -> str:
    # Return a plain string on success.
    # Raise an exception to signal failure.
    return "result"</code></pre>
  <div class="note">No classes, no module-level <code>print()</code>. The loader wraps <code>execute</code> in a <code>Tool</code> subclass automatically.</div>

  <h3>ToolResult (class-based format)</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Field</th><th>Type</th><th>Description</th></tr>
      <tr><td><code>success</code></td><td>bool</td><td>Whether the tool succeeded</td></tr>
      <tr><td><code>output</code></td><td>str</td><td>Always a string — LLM sees this</td></tr>
      <tr><td><code>error</code></td><td>str | None</td><td>Included in LLM output on failure</td></tr>
      <tr><td><code>metadata</code></td><td>dict</td><td>Internal hints, e.g. <code>is_image: True</code></td></tr>
    </table>
  </div>

  <h3>Self-extension via write_tool</h3>
  <p>The agent can install new tools permanently at runtime. <code>WriteToolTool</code> validates, writes to <code>tools/{name}.py</code>, adds to <code>tools/enabled.json</code>, then hot-reloads. New tool is available from the <strong>next</strong> user message.</p>
</section>

<!-- ─── WS PROTOCOL ─── -->
<section id="ws-protocol">
  <h2><span class="icon" style="background:rgba(188,140,255,0.12)">📡</span> WebSocket Protocol</h2>

  <p>Endpoint: <code>ws://host/ws/sessions/{session_id}</code><br>
  Closes with code <code>4004</code> if session not found.</p>

  <h3>Client → Server</h3>
  <pre><code>{
  "type": "message",         // required, always "message"
  "content": "user text",    // required, non-empty
  "images": ["base64..."],   // optional; data: URI prefix stripped server-side
  "files": [                 // optional; from POST /sessions/{id}/files
    {"name": "file.pdf", "path": "/abs/path/..."}
  ]
}</code></pre>
</section>

<!-- ─── WS EVENTS ─── -->
<section id="ws-events">
  <h2><span class="icon" style="background:rgba(88,166,255,0.12)">📬</span> Events Reference</h2>
  <div class="table-wrap">
    <table>
      <tr><th>Type</th><th>Direction</th><th>Fields</th><th>Description</th></tr>
      <tr>
        <td><span class="event-type et-muted">stream_start</span></td>
        <td>S→C</td><td>—</td>
        <td>Agent processing began. Block user input.</td>
      </tr>
      <tr>
        <td><span class="event-type et-purple">thinking_delta</span></td>
        <td>S→C</td><td><code>delta</code></td>
        <td>Reasoning chunk (streaming). Accumulate until <code>thinking_end</code>.</td>
      </tr>
      <tr>
        <td><span class="event-type et-purple">thinking_end</span></td>
        <td>S→C</td><td>—</td>
        <td>Reasoning phase complete. Auto-collapsed in UI.</td>
      </tr>
      <tr>
        <td><span class="event-type et-purple">turn_thinking</span></td>
        <td>S→C</td><td><code>thinking</code>, <code>is_subagent</code></td>
        <td>Full reasoning block from tool-calling turn (non-streaming).</td>
      </tr>
      <tr>
        <td><span class="event-type et-blue">plan_ready</span></td>
        <td>S→C</td><td><code>plan</code></td>
        <td>Step-by-step plan before execution. Rendered as 🗺️ card.</td>
      </tr>
      <tr>
        <td><span class="event-type et-orange">tool_started</span></td>
        <td>S→C</td><td><code>tool</code>, <code>args</code>, <code>is_subagent</code></td>
        <td>Tool call began. Shows pending spinner in UI immediately.</td>
      </tr>
      <tr>
        <td><span class="event-type et-orange">tool_call</span></td>
        <td>S→C</td><td><code>tool</code>, <code>args</code>, <code>result</code>, <code>success</code>, <code>is_subagent</code></td>
        <td>Tool finished. Pairs with preceding <code>tool_started</code>.</td>
      </tr>
      <tr>
        <td><span class="event-type et-green">stream_delta</span></td>
        <td>S→C</td><td><code>delta</code></td>
        <td>Final response text chunk. Accumulate to build full content.</td>
      </tr>
      <tr>
        <td><span class="event-type et-green">stream_end</span></td>
        <td>S→C</td><td><code>content</code>, <code>context_tokens</code>, <code>max_context_tokens</code></td>
        <td>Final response complete. Unlock user input.</td>
      </tr>
      <tr>
        <td><span class="event-type et-muted">stream_stopped</span></td>
        <td>S→C</td><td>—</td>
        <td>User stopped generation via POST /sessions/{id}/stop.</td>
      </tr>
      <tr>
        <td><span class="event-type et-blue">context_compressed</span></td>
        <td>S→C</td><td><code>messages_before</code>, <code>messages_after</code></td>
        <td>Context compression ran after this turn.</td>
      </tr>
      <tr>
        <td><span class="event-type et-blue">profile_switched</span></td>
        <td>S→C</td><td><code>profile_id</code>, <code>profile_name</code></td>
        <td>Active profile changed mid-stream by switch_profile tool.</td>
      </tr>
      <tr>
        <td><span class="event-type" style="background:rgba(248,81,73,0.12);color:var(--red)">error</span></td>
        <td>S→C</td><td><code>message</code></td>
        <td>Unhandled error. Some are recoverable, some terminate the stream.</td>
      </tr>
    </table>
  </div>
</section>

<!-- ─── WS SEQUENCES ─── -->
<section id="ws-sequences">
  <h2><span class="icon" style="background:rgba(63,185,80,0.12)">🎬</span> Typical Event Sequences</h2>

  <h3>Simple question (no tools)</h3>
  <div class="seq">
    <div class="s-start">stream_start</div>
    <div class="s-thinking">thinking_delta × N  <span style="color:var(--border)">// if model reasons</span></div>
    <div class="s-thinking">thinking_end</div>
    <div class="s-text">stream_delta × N</div>
    <div class="s-end">stream_end</div>
  </div>

  <h3>With planning + tools</h3>
  <div class="seq">
    <div class="s-start">stream_start</div>
    <div class="s-blue" style="color:var(--accent)">plan_ready              <span style="color:var(--border)">// if planning_enabled</span></div>
    <div class="s-tool">turn_thinking           <span style="color:var(--border)">// reasoning before tool selection</span></div>
    <div class="s-tool">tool_started</div>
    <div class="s-tool">tool_call</div>
    <div class="s-tool">tool_started</div>
    <div class="s-tool">tool_call</div>
    <div class="s-thinking">thinking_delta × N</div>
    <div class="s-thinking">thinking_end</div>
    <div class="s-text">stream_delta × N</div>
    <div class="s-end">stream_end</div>
    <div style="color:var(--muted)">context_compressed      <span style="color:var(--border)">// optional, if threshold hit</span></div>
  </div>

  <h3>Subagent (spawn_agent)</h3>
  <div class="seq">
    <div class="s-start">stream_start</div>
    <div class="s-tool">tool_started  spawn_agent  is_subagent=false</div>
    <div class="s-sub">turn_thinking              is_subagent=true</div>
    <div class="s-sub">tool_started  mcp:navi-web:web_search   is_subagent=true</div>
    <div class="s-sub">tool_call     mcp:navi-web:web_search   is_subagent=true</div>
    <div class="s-sub">tool_started  filesystem   is_subagent=true</div>
    <div class="s-sub">tool_call     filesystem   is_subagent=true</div>
    <div class="s-tool">tool_call     spawn_agent  is_subagent=false</div>
    <div class="s-text">stream_delta × N</div>
    <div class="s-end">stream_end</div>
  </div>

  <h3>Profile switch</h3>
  <div class="seq">
    <div class="s-start">stream_start</div>
    <div class="s-tool">tool_started  switch_profile</div>
    <div class="s-blue" style="color:var(--accent)">profile_switched        <span style="color:var(--border)">// update UI here</span></div>
    <div class="s-tool">tool_call     switch_profile</div>
    <div class="s-text">stream_delta × N</div>
    <div class="s-end">stream_end</div>
  </div>
</section>

<!-- ─── REST API ─── -->
<section id="rest-api">
  <h2><span class="icon" style="background:rgba(88,166,255,0.12)">🌐</span> REST API</h2>
  <div class="table-wrap">
    <table>
      <tr><th>Method</th><th>Path</th><th>Description</th></tr>
      <tr>
        <td><span class="method get">GET</span></td>
        <td><code>/health</code></td>
        <td>Health check → <code>{"status":"ok"}</code></td>
      </tr>
      <tr>
        <td><span class="method get">GET</span></td>
        <td><code>/agents/profiles</code></td>
        <td>List all available profiles</td>
      </tr>
      <tr>
        <td><span class="method get">GET</span></td>
        <td><code>/agents/tools</code></td>
        <td>List all registered tools (builtin + user)</td>
      </tr>
      <tr>
        <td><span class="method post">POST</span></td>
        <td><code>/sessions</code></td>
        <td>Create session → <code>{session_id, profile_id, created_at}</code></td>
      </tr>
      <tr>
        <td><span class="method get">GET</span></td>
        <td><code>/sessions</code></td>
        <td>List all sessions (sorted by pinned+last_active)</td>
      </tr>
      <tr>
        <td><span class="method get">GET</span></td>
        <td><code>/sessions/{id}</code></td>
        <td>Full session with message history (display buffer)</td>
      </tr>
      <tr>
        <td><span class="method get">GET</span></td>
        <td><code>/sessions/{id}/context</code></td>
        <td>LLM context (may differ from messages — for debugging)</td>
      </tr>
      <tr>
        <td><span class="method patch">PATCH</span></td>
        <td><code>/sessions/{id}/pin</code></td>
        <td>Pin or unpin a session</td>
      </tr>
      <tr>
        <td><span class="method del">DEL</span></td>
        <td><code>/sessions/{id}</code></td>
        <td>Delete session and its uploaded files</td>
      </tr>
      <tr>
        <td><span class="method post">POST</span></td>
        <td><code>/sessions/{id}/files</code></td>
        <td>Upload file (multipart/form-data). Max 200 MB. TTL 24h.</td>
      </tr>
      <tr>
        <td><span class="method post">POST</span></td>
        <td><code>/sessions/{id}/messages</code></td>
        <td>Send message, wait for full response (non-streaming)</td>
      </tr>
      <tr>
        <td><span class="method post">POST</span></td>
        <td><code>/sessions/{id}/stop</code></td>
        <td>Signal cooperative stop for running agent</td>
      </tr>
      <tr>
        <td><span class="method ws">WS</span></td>
        <td><code>/ws/sessions/{id}</code></td>
        <td>Streaming agent interface</td>
      </tr>
    </table>
  </div>
</section>

<!-- ─── PROFILES ─── -->
<section id="profiles">
  <h2><span class="icon" style="background:rgba(63,185,80,0.12)">👤</span> Profiles</h2>
  <p>Profiles define tools, system prompt, model, and behaviour per domain. Defined in <code>navi/profiles/</code>.</p>

  <div class="table-wrap">
    <table>
      <tr><th>Profile ID</th><th>Name</th><th>Model</th><th>Temp</th><th>Planning</th></tr>
      <tr>
        <td><code>secretary</code></td><td>Personal Secretary</td>
        <td><code>gemma4:31b-cloud</code></td>
        <td>0.7</td>
        <td><span class="badge badge-green">Yes</span></td>
      </tr>
      <tr>
        <td><code>server_admin</code></td><td>Server Administrator</td>
        <td><code>gemma4:31b-cloud</code></td>
        <td>0.2</td>
        <td><span class="badge badge-green">Yes</span></td>
      </tr>
      <tr>
        <td><code>smart_home</code></td><td>Smart Home Assistant</td>
        <td><code>gemma4:31b-cloud</code></td>
        <td>0.3</td>
        <td><span class="badge badge-green">Yes</span></td>
      </tr>
    </table>
  </div>

  <h3>Per-profile scratchpad sections</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Profile</th><th>Sections</th><th>Domain focus</th></tr>
      <tr><td><code>secretary</code></td><td><code>findings</code>, <code>sources</code>, <code>drafts</code></td><td>Research, writing, analysis</td></tr>
      <tr><td><code>server_admin</code></td><td><code>status</code>, <code>logs</code>, <code>errors</code>, <code>plan</code></td><td>Remote ops, monitoring</td></tr>
      <tr><td><code>smart_home</code></td><td><code>state</code>, <code>config</code>, <code>errors</code></td><td>Home Assistant, IoT, automations</td></tr>
    </table>
  </div>

  <h3>AgentProfile fields</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Field</th><th>Type</th><th>Description</th></tr>
      <tr><td><code>id</code></td><td>str</td><td>Unique identifier used in API and sessions</td></tr>
      <tr><td><code>name</code></td><td>str</td><td>Human-readable name for UI</td></tr>
      <tr><td><code>system_prompt</code></td><td>str</td><td>Domain-specific instructions (appended after persona)</td></tr>
      <tr><td><code>enabled_tools</code></td><td>list[str]</td><td>Tool names available to this profile</td></tr>
      <tr><td><code>model</code></td><td>str</td><td>Ollama model override (falls back to settings default)</td></tr>
      <tr><td><code>temperature</code></td><td>float</td><td>LLM temperature</td></tr>
      <tr><td><code>max_iterations</code></td><td>int</td><td>Tool-calling loop limit (default 50)</td></tr>
      <tr><td><code>planning_enabled</code></td><td>bool</td><td>Run planning phase before tool loop</td></tr>
      <tr><td><code>llm_backend</code></td><td>str</td><td>Backend key in BackendRegistry (default "ollama")</td></tr>
    </table>
  </div>
</section>

<!-- ─── MEMORY ─── -->
<section id="memory">
  <h2><span class="icon" style="background:rgba(88,166,255,0.12)">🧠</span> Memory System</h2>
  <p>Long-term user memory: facts extracted from conversations, stored in SQLite, injected into every session.</p>

  <h3>Database schema</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Table</th><th>Key columns</th><th>Purpose</th></tr>
      <tr>
        <td><code>memory_facts</code></td>
        <td><code>(category, key)</code> unique</td>
        <td>Individual facts about the user — preferences, projects, environment</td>
      </tr>
      <tr>
        <td><code>memory_summary</code></td>
        <td>Single row (id=1)</td>
        <td>Narrative summary generated from all facts; injected into every session</td>
      </tr>
      <tr>
        <td><code>session_memory_state</code></td>
        <td><code>session_id, extracted_at</code></td>
        <td>Tracks which sessions have been processed for extraction</td>
      </tr>
    </table>
  </div>

  <h3>Automatic extraction trigger</h3>
  <p><code>POST /sessions</code> (create new session) fires <code>_process_stale_sessions()</code> as a background task. Processes sessions idle > 30 minutes that haven't been extracted yet.</p>

  <h3>Memory injection</h3>
  <p>On every <code>run_stream()</code> / <code>run()</code> call, <code>_memory_msg()</code> fetches the summary and returns a system message: <code>"## What I remember about the user\n\n{summary}"</code>. Injected after main system prompt, before conversation history.</p>

  <h3>Memory tools usage rules</h3>
  <div class="note">
    Call <code>memory_search</code> when the user mentions something personal or before making assumptions about their environment. <strong>Do not call at session start reflexively</strong> — only when context warrants it. Call <code>memory_forget</code> only when explicitly asked.
  </div>
</section>

<!-- ─── CONFIG ─── -->
<section id="config">
  <h2><span class="icon" style="background:rgba(210,153,34,0.12)">⚙️</span> Configuration</h2>
  <p>All settings read from <code>.env</code> via <code>pydantic-settings</code>. Imported as <code>from navi.config import settings</code>.</p>

  <h3>LLM</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Variable</th><th>Default</th><th>Description</th></tr>
      <tr><td><code>OLLAMA_HOST</code></td><td><code>http://localhost:11434</code></td><td>Ollama server URL</td></tr>
      <tr><td><code>OLLAMA_DEFAULT_MODEL</code></td><td><code>gemma4:31b-cloud</code></td><td>Default model (overridable per profile)</td></tr>
      <tr><td><code>OLLAMA_NUM_CTX</code></td><td><code>65536</code></td><td>Context window size in tokens</td></tr>
      <tr><td><code>OLLAMA_THINK</code></td><td><code>true</code></td><td>Enable extended reasoning</td></tr>
    </table>
  </div>

  <h3>Security / Sandboxing</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Variable</th><th>Default</th><th>Description</th></tr>
      <tr><td><code>FS_ALLOWED_PATHS</code></td><td><code>*</code></td><td>Comma-separated paths filesystem tool can access. <code>*</code> = no limit</td></tr>
      <tr><td><code>TERMINAL_ALLOWED_COMMANDS</code></td><td><code>*</code></td><td>Comma-separated allowed executables. <code>*</code> = allow all</td></tr>
      <tr><td><code>SSH_HOSTS_FILE</code></td><td><code>ssh_hosts.json</code></td><td>Named SSH connections config</td></tr>
    </table>
  </div>

  <h3>Persona</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Variable</th><th>Description</th></tr>
      <tr><td><code>NAVI_PERSONA</code></td><td>Inline global personality prompt</td></tr>
      <tr><td><code>NAVI_PERSONA_FILE</code></td><td>Path to .txt file with persona (recommended — inline doesn't parse multiline well)</td></tr>
    </table>
  </div>

  <h3>Other</h3>
  <div class="table-wrap">
    <table>
      <tr><th>Variable</th><th>Default</th><th>Description</th></tr>
      <tr><td><code>DB_PATH</code></td><td><code>navi.db</code></td><td>SQLite file path</td></tr>
      <tr><td><code>LOG_LEVEL</code></td><td><code>INFO</code></td><td>DEBUG / INFO / WARNING / ERROR</td></tr>
      <tr><td><code>TOOLS_DIR</code></td><td><code>tools</code></td><td>User tools directory</td></tr>
      <tr><td><code>SESSION_FILES_DIR</code></td><td><code>session_files</code></td><td>Uploaded files directory</td></tr>
      <tr><td><code>SESSION_FILES_MAX_SIZE_MB</code></td><td><code>200</code></td><td>Max upload size per file</td></tr>
      <tr><td><code>SESSION_FILES_TTL_HOURS</code></td><td><code>24</code></td><td>File retention hours</td></tr>
    </table>
  </div>
</section>

</main>

<script>
// Highlight active nav link on scroll
const sections = document.querySelectorAll('section[id]');
const links = document.querySelectorAll('nav a[href^="#"]');

const observer = new IntersectionObserver((entries) => {
  entries.forEach(e => {
    if (e.isIntersecting) {
      links.forEach(l => l.classList.remove('active'));
      const active = document.querySelector(`nav a[href="#${e.target.id}"]`);
      if (active) active.classList.add('active');
    }
  });
}, { threshold: 0.2, rootMargin: '-10% 0px -70% 0px' });

sections.forEach(s => observer.observe(s));

// Smooth scroll
links.forEach(l => {
  l.addEventListener('click', e => {
    e.preventDefault();
    const target = document.querySelector(l.getAttribute('href'));
    if (target) target.scrollIntoView({ behavior: 'smooth' });
  });
});
</script>
</body>
</html>