diff --git a/navi/core/agent.py b/navi/core/agent.py
index 3df77af..23f7b80 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -32,6 +32,7 @@
 from .compressor import compress_context, should_compress
 from .events import (
     AgentEvent,
+    AIHelperTokensUsed,
     ContextCompressed,
     PlanReady,
     StreamEnd,
@@ -615,6 +616,8 @@
                     if isinstance(item, SubagentComplete):
                         _subagent_tokens += item.token_count
                         _tool_call_count += item.tool_call_count
+                    elif isinstance(item, AIHelperTokensUsed):
+                        _subagent_tokens += item.total
                     else:
                         yield item
 
diff --git a/navi/core/ai_helper.py b/navi/core/ai_helper.py
index b14b735..8334dd2 100644
--- a/navi/core/ai_helper.py
+++ b/navi/core/ai_helper.py
@@ -49,6 +49,9 @@
     async def ask(self, system: str, prompt: str) -> str:
         """Single non-streaming LLM call. Returns the response text."""
         from navi.llm.base import Message
+        from navi.tools.base import current_event_sink
+        from navi.core.events import AIHelperTokensUsed
+
         messages = [
             Message(role="system", content=system),
             Message(role="user",   content=prompt),
@@ -60,6 +63,16 @@
             model=self._active_model(),
             think=False,
         )
+
+        # Emit token usage so run_stream can account for AIHelper calls in session metrics
+        if response.prompt_tokens or response.completion_tokens:
+            sink = current_event_sink.get()
+            if sink is not None:
+                await sink.put(AIHelperTokensUsed(
+                    prompt_tokens=response.prompt_tokens or 0,
+                    completion_tokens=response.completion_tokens or 0,
+                ))
+
         return (response.content or "").strip()
 
     async def ask_json(self, system: str, prompt: str) -> list | dict | None:
diff --git a/navi/core/events.py b/navi/core/events.py
index 51ced39..47b7384 100644
--- a/navi/core/events.py
+++ b/navi/core/events.py
@@ -109,8 +109,21 @@
     tool_call_count: int = 0
 
 
+@dataclass
+class AIHelperTokensUsed:
+    """Internal: emitted by AIHelper after each LLM call to report token usage.
+    Never forwarded to WebSocket clients."""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+
+    @property
+    def total(self) -> int:
+        return self.prompt_tokens + self.completion_tokens
+
+
 AgentEvent = (
     ToolStarted | ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd
     | StreamEnd | StreamStopped | ContextCompressed | TurnThinking | ProfileSwitched
-    | PlanReady | SubagentComplete
+    | PlanReady | SubagentComplete | AIHelperTokensUsed
 )
diff --git a/navi/llm/base.py b/navi/llm/base.py
index 2c5b44f..379e3a3 100644
--- a/navi/llm/base.py
+++ b/navi/llm/base.py
@@ -59,6 +59,8 @@
     tool_calls: list[ToolCallRequest] | None
     finish_reason: str  # "stop" | "tool_calls" | "length"
     thinking: str | None = None
+    prompt_tokens: int | None = None
+    completion_tokens: int | None = None
 
 
 class LLMChunk(BaseModel):
diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py
index 5ca96d7..27f2668 100644
--- a/navi/llm/ollama.py
+++ b/navi/llm/ollama.py
@@ -89,6 +89,8 @@
                 tool_calls=tool_calls,
                 finish_reason=finish_reason,
                 thinking=getattr(msg, "thinking", None) or None,
+                prompt_tokens=getattr(response, "prompt_eval_count", None) or None,
+                completion_tokens=getattr(response, "eval_count", None) or None,
             )
         except Exception as e:
             raise LLMBackendError(str(e)) from e