diff --git a/navi/core/agent.py b/navi/core/agent.py
index 5b40650..7592b36 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -225,7 +225,7 @@
         if _is_first_message or profile.planning_enabled:
             async for _ev in self._planning.run(session.context, profile, llm, mem, tool_schemas, messages=session.messages, force_plan=_force_plan):
                 if isinstance(_ev, AIHelperTokensUsed):
-                    turn_ctx.subagent_tokens += _ev.total
+                    turn_ctx.subagent_tokens += _ev.completion_tokens
                 elif isinstance(_ev, PlanningDebugData):
                     session.planning_logs.append(_ev.log)
                     # Cap to prevent unbounded DB growth on long sessions
@@ -562,10 +562,10 @@
                 if state.thinking_active:
                     yield ThinkingEnd()
                 break
-            if chunk.prompt_tokens is not None or chunk.completion_tokens is not None:
-                _iter_tokens = (chunk.prompt_tokens or 0) + (chunk.completion_tokens or 0)
-                turn_ctx.turn_tokens += _iter_tokens
-                state.context_tokens = _iter_tokens
+            if chunk.prompt_tokens is not None:
+                state.context_tokens = chunk.prompt_tokens
+            if chunk.completion_tokens is not None:
+                turn_ctx.turn_tokens += chunk.completion_tokens
             if chunk.thinking:
                 state.accumulated_thinking += chunk.thinking
                 if not state.thinking_active:
diff --git a/navi/core/events.py b/navi/core/events.py
index a5e579b..1a05d33 100644
--- a/navi/core/events.py
+++ b/navi/core/events.py
@@ -76,11 +76,11 @@
     """Marks the end of the streaming response."""
 
     full_content: str
-    context_tokens: int | None = None   # total tokens used in this turn
+    context_tokens: int | None = None   # prompt tokens for the last LLM call
     max_context_tokens: int = 0         # ollama_num_ctx from config
     elapsed_seconds: float | None = None
     tool_call_count: int = 0
-    token_count: int | None = None      # same as context_tokens; kept separate for clarity
+    token_count: int | None = None      # completion tokens generated in this turn
     message_index: int | None = None    # raw index of the first assistant msg in this turn group
 
     def to_wire(self) -> dict:
diff --git a/navi/core/subagent_runner.py b/navi/core/subagent_runner.py
index e37577b..6da19e2 100644
--- a/navi/core/subagent_runner.py
+++ b/navi/core/subagent_runner.py
@@ -223,7 +223,7 @@
                     is_subagent=True,
                 ):
                     if isinstance(_ev, AIHelperTokensUsed):
-                        _turn_tokens += _ev.total
+                        _turn_tokens += _ev.completion_tokens
                     elif sink is not None:
                         await sink.put(_ev)
 
@@ -276,13 +276,8 @@
                     first_chunk_timeout=settings.llm_stream_first_chunk_timeout,
                     chunk_timeout=settings.llm_stream_chunk_timeout,
                 ):
-                    if (
-                        chunk.prompt_tokens is not None
-                        or chunk.completion_tokens is not None
-                    ):
-                        _turn_tokens += (chunk.prompt_tokens or 0) + (
-                            chunk.completion_tokens or 0
-                        )
+                    if chunk.completion_tokens is not None:
+                        _turn_tokens += chunk.completion_tokens
                     if chunk.thinking:
                         if thinking_started_at is None:
                             thinking_started_at = time.monotonic()
diff --git a/tests/unit/core/test_agent.py b/tests/unit/core/test_agent.py
index 56fee06..e48fa6c 100644
--- a/tests/unit/core/test_agent.py
+++ b/tests/unit/core/test_agent.py
@@ -95,7 +95,7 @@
 
     @pytest.mark.asyncio
     async def test_run_token_accumulation(self, agent, session):
-        """_turn_tokens accumulates across tool-calling iterations."""
+        """_turn_tokens accumulates completion tokens across tool-calling iterations."""
         backend = FakeLLMBackend(
             responses=["", "done"],
             tool_calls=[
@@ -110,8 +110,8 @@
         await agent.run(session.id, "do something")
         saved = await agent._sessions.get(session.id)
         final_msg = saved.messages[-1]
-        # Two iterations × (10 + 5) = 30 tokens
-        assert final_msg.token_count == 30
+        # Two iterations × 5 completion tokens = 10 tokens
+        assert final_msg.token_count == 10
 
     @pytest.mark.asyncio
     async def test_run_max_iterations(self, agent, session):
@@ -209,9 +209,9 @@
             if isinstance(ev, StreamEnd):
                 events.append(ev)
 
-        assert events[0].token_count == 150
+        assert events[0].token_count == 50
         saved = await agent._sessions.get(session.id)
-        assert saved.messages[-1].token_count == 150
+        assert saved.messages[-1].token_count == 50
 
 
 # ─── run_ephemeral() tests ───────────────────────────────────────────────────
@@ -283,10 +283,10 @@
                 if isinstance(item, SubagentComplete):
                     subagent_complete = item
 
-            # Planning tokens: (5+10) + (3+7) = 25
+            # Planning completion tokens: 10 + 7 = 17
             # Final LLM call: 0 (no tokens in FakeLLMBackend default)
             assert subagent_complete is not None
-            assert subagent_complete.token_count == 25
+            assert subagent_complete.token_count == 17
         finally:
             current_event_sink.reset(token)
             agent._planning.run = original_planning_run