- Select a profile and start a new chat
+ Select a profile and start a new chat
+
diff --git a/client/js/app.js b/client/js/app.js
index 0426962..7679cca 100644
--- a/client/js/app.js
+++ b/client/js/app.js
@@ -12,6 +12,7 @@
const btnNew = document.getElementById('btn-new');
const sessionListEl = document.getElementById('session-list');
const chatHeaderEl = document.getElementById('chat-header');
+const tokenCounterEl = document.getElementById('token-counter');
const messagesEl = document.getElementById('messages');
const textarea = document.getElementById('input');
const btnSend = document.getElementById('btn-send');
@@ -212,6 +213,7 @@
case 'stream_end':
finishStream(event.content);
+ updateTokenCounter(event.context_tokens, event.max_context_tokens);
setInputEnabled(true);
break;
@@ -360,6 +362,15 @@
});
}
+function updateTokenCounter(used, max) {
+ if (!used || !max) return;
+ const pct = Math.round((used / max) * 100);
+ tokenCounterEl.textContent = `${used.toLocaleString()}/${max.toLocaleString()} (${pct}%) tokens`;
+ tokenCounterEl.classList.toggle('warn', pct >= 50 && pct < 80);
+ tokenCounterEl.classList.toggle('danger', pct >= 80);
+ tokenCounterEl.hidden = false;
+}
+
// ── Start ─────────────────────────────────────────────────────────────────────
init();
diff --git a/client/js/sidebar.js b/client/js/sidebar.js
index 4782f35..57915e8 100644
--- a/client/js/sidebar.js
+++ b/client/js/sidebar.js
@@ -51,7 +51,9 @@
}
export function updateChatHeader(headerEl, profileId, profileName) {
- headerEl.innerHTML = profileId
+ const titleEl = headerEl.querySelector('#chat-header-title');
+ if (!titleEl) return;
+ titleEl.innerHTML = profileId
? `${esc(profileId)} ${esc(profileName || profileId)}`
: 'Select a profile and start a new chat';
}
diff --git a/client/style.css b/client/style.css
index e1ab084..8009bfc 100644
--- a/client/style.css
+++ b/client/style.css
@@ -167,6 +167,17 @@
align-items: center;
gap: 8px;
}
+.token-counter {
+ margin-left: auto;
+ font-size: 11px;
+ font-variant-numeric: tabular-nums;
+ color: var(--text-muted);
+ white-space: nowrap;
+ transition: color 0.3s;
+}
+.token-counter.warn { color: #b8860b; }
+.token-counter.danger { color: #c0392b; }
+
.chat-header .profile-badge {
background: var(--accent);
color: #fff;
diff --git a/navi/api/websocket.py b/navi/api/websocket.py
index 3a5b1cb..321ddbf 100644
--- a/navi/api/websocket.py
+++ b/navi/api/websocket.py
@@ -89,7 +89,12 @@
"success": event.success,
})
elif isinstance(event, StreamEnd):
- await websocket.send_json({"type": "stream_end", "content": event.full_content})
+ await websocket.send_json({
+ "type": "stream_end",
+ "content": event.full_content,
+ "context_tokens": event.context_tokens,
+ "max_context_tokens": event.max_context_tokens,
+ })
except SessionNotFound:
await websocket.send_json({"type": "error", "message": "Session not found"})
diff --git a/navi/config.py b/navi/config.py
index 6fc37d2..18a5db8 100644
--- a/navi/config.py
+++ b/navi/config.py
@@ -6,7 +6,7 @@
ollama_host: str = "http://localhost:11434"
ollama_default_model: str = "gemma4:e2b-it-q8_0"
- ollama_num_ctx: int = 8192
+ ollama_num_ctx: int = 65536
ollama_think: bool = True
openai_api_key: str = ""
diff --git a/navi/core/agent.py b/navi/core/agent.py
index d5af9f1..ec5690b 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -77,6 +77,8 @@
"""Marks the end of the streaming response."""
full_content: str
+ context_tokens: int | None = None # total tokens used in this turn
+ max_context_tokens: int = 0 # ollama_num_ctx from config
AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd
@@ -183,8 +185,11 @@
final_messages = session.messages.copy()
accumulated = ""
thinking_active = False
+ context_tokens: int | None = None
async for chunk in llm.stream(final_messages, temperature=profile.temperature, model=profile.model):
+ if chunk.prompt_tokens is not None or chunk.completion_tokens is not None:
+ context_tokens = (chunk.prompt_tokens or 0) + (chunk.completion_tokens or 0)
if chunk.thinking:
if not thinking_active:
thinking_active = True
@@ -201,7 +206,11 @@
session.messages.append(Message(role="assistant", content=accumulated, created_at=datetime.now(timezone.utc)))
await self._sessions.save(session)
- yield StreamEnd(full_content=accumulated)
+ yield StreamEnd(
+ full_content=accumulated,
+ context_tokens=context_tokens,
+ max_context_tokens=settings.ollama_num_ctx,
+ )
return
# Tool calls: emit events, execute, continue loop
diff --git a/navi/llm/base.py b/navi/llm/base.py
index f4d4a71..88e8946 100644
--- a/navi/llm/base.py
+++ b/navi/llm/base.py
@@ -57,6 +57,9 @@
delta: str | None = None
thinking: str | None = None
finish_reason: str | None = None # "stop" | "length"; None while streaming
+ # Token counts — only present on the final chunk (finish_reason == "stop")
+ prompt_tokens: int | None = None
+ completion_tokens: int | None = None
class LLMBackend(ABC):
diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py
index ccefd79..8c36103 100644
--- a/navi/llm/ollama.py
+++ b/navi/llm/ollama.py
@@ -99,6 +99,12 @@
thinking = getattr(chunk.message, "thinking", None) or None
delta = chunk.message.content or None
finish_reason = "stop" if chunk.done else None
- yield LLMChunk(delta=delta, thinking=thinking, finish_reason=finish_reason)
+ yield LLMChunk(
+ delta=delta,
+ thinking=thinking,
+ finish_reason=finish_reason,
+ prompt_tokens=chunk.prompt_eval_count if chunk.done else None,
+ completion_tokens=chunk.eval_count if chunk.done else None,
+ )
except Exception as e:
raise LLMBackendError(str(e)) from e