diff --git a/navi/core/agent.py b/navi/core/agent.py index 3df77af..23f7b80 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -32,6 +32,7 @@ from .compressor import compress_context, should_compress from .events import ( AgentEvent, + AIHelperTokensUsed, ContextCompressed, PlanReady, StreamEnd, @@ -615,6 +616,8 @@ if isinstance(item, SubagentComplete): _subagent_tokens += item.token_count _tool_call_count += item.tool_call_count + elif isinstance(item, AIHelperTokensUsed): + _subagent_tokens += item.total else: yield item diff --git a/navi/core/ai_helper.py b/navi/core/ai_helper.py index b14b735..8334dd2 100644 --- a/navi/core/ai_helper.py +++ b/navi/core/ai_helper.py @@ -49,6 +49,9 @@ async def ask(self, system: str, prompt: str) -> str: """Single non-streaming LLM call. Returns the response text.""" from navi.llm.base import Message + from navi.tools.base import current_event_sink + from navi.core.events import AIHelperTokensUsed + messages = [ Message(role="system", content=system), Message(role="user", content=prompt), @@ -60,6 +63,16 @@ model=self._active_model(), think=False, ) + + # Emit token usage so run_stream can account for AIHelper calls in session metrics + if response.prompt_tokens or response.completion_tokens: + sink = current_event_sink.get() + if sink is not None: + await sink.put(AIHelperTokensUsed( + prompt_tokens=response.prompt_tokens or 0, + completion_tokens=response.completion_tokens or 0, + )) + return (response.content or "").strip() async def ask_json(self, system: str, prompt: str) -> list | dict | None: diff --git a/navi/core/events.py b/navi/core/events.py index 51ced39..47b7384 100644 --- a/navi/core/events.py +++ b/navi/core/events.py @@ -109,8 +109,21 @@ tool_call_count: int = 0 +@dataclass +class AIHelperTokensUsed: + """Internal: emitted by AIHelper after each LLM call to report token usage. + Never forwarded to WebSocket clients.""" + + prompt_tokens: int = 0 + completion_tokens: int = 0 + + @property + def total(self) -> int: + return self.prompt_tokens + self.completion_tokens + + AgentEvent = ( ToolStarted | ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | StreamStopped | ContextCompressed | TurnThinking | ProfileSwitched - | PlanReady | SubagentComplete + | PlanReady | SubagentComplete | AIHelperTokensUsed ) diff --git a/navi/llm/base.py b/navi/llm/base.py index 2c5b44f..379e3a3 100644 --- a/navi/llm/base.py +++ b/navi/llm/base.py @@ -59,6 +59,8 @@ tool_calls: list[ToolCallRequest] | None finish_reason: str # "stop" | "tool_calls" | "length" thinking: str | None = None + prompt_tokens: int | None = None + completion_tokens: int | None = None class LLMChunk(BaseModel): diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py index 5ca96d7..27f2668 100644 --- a/navi/llm/ollama.py +++ b/navi/llm/ollama.py @@ -89,6 +89,8 @@ tool_calls=tool_calls, finish_reason=finish_reason, thinking=getattr(msg, "thinking", None) or None, + prompt_tokens=getattr(response, "prompt_eval_count", None) or None, + completion_tokens=getattr(response, "eval_count", None) or None, ) except Exception as e: raise LLMBackendError(str(e)) from e