diff --git a/client/index.html b/client/index.html index 834febd..da8836f 100644 --- a/client/index.html +++ b/client/index.html @@ -25,7 +25,8 @@
- Select a profile and start a new chat + Select a profile and start a new chat +
diff --git a/client/js/app.js b/client/js/app.js index 0426962..7679cca 100644 --- a/client/js/app.js +++ b/client/js/app.js @@ -12,6 +12,7 @@ const btnNew = document.getElementById('btn-new'); const sessionListEl = document.getElementById('session-list'); const chatHeaderEl = document.getElementById('chat-header'); +const tokenCounterEl = document.getElementById('token-counter'); const messagesEl = document.getElementById('messages'); const textarea = document.getElementById('input'); const btnSend = document.getElementById('btn-send'); @@ -212,6 +213,7 @@ case 'stream_end': finishStream(event.content); + updateTokenCounter(event.context_tokens, event.max_context_tokens); setInputEnabled(true); break; @@ -360,6 +362,15 @@ }); } +function updateTokenCounter(used, max) { + if (!used || !max) return; + const pct = Math.round((used / max) * 100); + tokenCounterEl.textContent = `${used.toLocaleString()}/${max.toLocaleString()} (${pct}%) tokens`; + tokenCounterEl.classList.toggle('warn', pct >= 50 && pct < 80); + tokenCounterEl.classList.toggle('danger', pct >= 80); + tokenCounterEl.hidden = false; +} + // ── Start ───────────────────────────────────────────────────────────────────── init(); diff --git a/client/js/sidebar.js b/client/js/sidebar.js index 4782f35..57915e8 100644 --- a/client/js/sidebar.js +++ b/client/js/sidebar.js @@ -51,7 +51,9 @@ } export function updateChatHeader(headerEl, profileId, profileName) { - headerEl.innerHTML = profileId + const titleEl = headerEl.querySelector('#chat-header-title'); + if (!titleEl) return; + titleEl.innerHTML = profileId ? `${esc(profileId)} ${esc(profileName || profileId)}` : 'Select a profile and start a new chat'; } diff --git a/client/style.css b/client/style.css index e1ab084..8009bfc 100644 --- a/client/style.css +++ b/client/style.css @@ -167,6 +167,17 @@ align-items: center; gap: 8px; } +.token-counter { + margin-left: auto; + font-size: 11px; + font-variant-numeric: tabular-nums; + color: var(--text-muted); + white-space: nowrap; + transition: color 0.3s; +} +.token-counter.warn { color: #b8860b; } +.token-counter.danger { color: #c0392b; } + .chat-header .profile-badge { background: var(--accent); color: #fff; diff --git a/navi/api/websocket.py b/navi/api/websocket.py index 3a5b1cb..321ddbf 100644 --- a/navi/api/websocket.py +++ b/navi/api/websocket.py @@ -89,7 +89,12 @@ "success": event.success, }) elif isinstance(event, StreamEnd): - await websocket.send_json({"type": "stream_end", "content": event.full_content}) + await websocket.send_json({ + "type": "stream_end", + "content": event.full_content, + "context_tokens": event.context_tokens, + "max_context_tokens": event.max_context_tokens, + }) except SessionNotFound: await websocket.send_json({"type": "error", "message": "Session not found"}) diff --git a/navi/config.py b/navi/config.py index 6fc37d2..18a5db8 100644 --- a/navi/config.py +++ b/navi/config.py @@ -6,7 +6,7 @@ ollama_host: str = "http://localhost:11434" ollama_default_model: str = "gemma4:e2b-it-q8_0" - ollama_num_ctx: int = 8192 + ollama_num_ctx: int = 65536 ollama_think: bool = True openai_api_key: str = "" diff --git a/navi/core/agent.py b/navi/core/agent.py index d5af9f1..ec5690b 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -77,6 +77,8 @@ """Marks the end of the streaming response.""" full_content: str + context_tokens: int | None = None # total tokens used in this turn + max_context_tokens: int = 0 # ollama_num_ctx from config AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd @@ -183,8 +185,11 @@ final_messages = session.messages.copy() accumulated = "" thinking_active = False + context_tokens: int | None = None async for chunk in llm.stream(final_messages, temperature=profile.temperature, model=profile.model): + if chunk.prompt_tokens is not None or chunk.completion_tokens is not None: + context_tokens = (chunk.prompt_tokens or 0) + (chunk.completion_tokens or 0) if chunk.thinking: if not thinking_active: thinking_active = True @@ -201,7 +206,11 @@ session.messages.append(Message(role="assistant", content=accumulated, created_at=datetime.now(timezone.utc))) await self._sessions.save(session) - yield StreamEnd(full_content=accumulated) + yield StreamEnd( + full_content=accumulated, + context_tokens=context_tokens, + max_context_tokens=settings.ollama_num_ctx, + ) return # Tool calls: emit events, execute, continue loop diff --git a/navi/llm/base.py b/navi/llm/base.py index f4d4a71..88e8946 100644 --- a/navi/llm/base.py +++ b/navi/llm/base.py @@ -57,6 +57,9 @@ delta: str | None = None thinking: str | None = None finish_reason: str | None = None # "stop" | "length"; None while streaming + # Token counts — only present on the final chunk (finish_reason == "stop") + prompt_tokens: int | None = None + completion_tokens: int | None = None class LLMBackend(ABC): diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py index ccefd79..8c36103 100644 --- a/navi/llm/ollama.py +++ b/navi/llm/ollama.py @@ -99,6 +99,12 @@ thinking = getattr(chunk.message, "thinking", None) or None delta = chunk.message.content or None finish_reason = "stop" if chunk.done else None - yield LLMChunk(delta=delta, thinking=thinking, finish_reason=finish_reason) + yield LLMChunk( + delta=delta, + thinking=thinking, + finish_reason=finish_reason, + prompt_tokens=chunk.prompt_eval_count if chunk.done else None, + completion_tokens=chunk.eval_count if chunk.done else None, + ) except Exception as e: raise LLMBackendError(str(e)) from e