diff --git a/navi/core/agent.py b/navi/core/agent.py index cccc8d1..f2a2072 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -451,6 +451,7 @@ return accumulated_text = "" + accumulated_thinking = "" turn_tool_calls: list[ToolCallRequest] | None = None thinking_active = False context_tokens: int | None = None @@ -490,6 +491,7 @@ if chunk.prompt_tokens is not None or chunk.completion_tokens is not None: context_tokens = (chunk.prompt_tokens or 0) + (chunk.completion_tokens or 0) if chunk.thinking: + accumulated_thinking += chunk.thinking if not thinking_active: thinking_active = True yield ThinkingDelta(delta=chunk.thinking) @@ -519,6 +521,7 @@ if not turn_tool_calls: # Final response — text already streamed above assistant_msg = Message(role="assistant", content=accumulated_text or None, + thinking=accumulated_thinking or None, created_at=datetime.now(timezone.utc)) session.messages.append(assistant_msg) session.context.append(assistant_msg) @@ -539,6 +542,7 @@ assistant_msg = Message( role="assistant", content=accumulated_text or None, + thinking=accumulated_thinking or None, tool_calls=turn_tool_calls, ) session.messages.append(assistant_msg) @@ -743,8 +747,10 @@ if not _re.search(r"(TOOL:|AGENT:|→\s*SELF)", plan_text): log.warning("agent.planning_no_executors", hint="plan lacks TOOL/AGENT/SELF assignments") - # Inject plan so the main loop continues from it + # Inject plan into context so the main loop continues from it, + # and into messages (with is_plan flag) so the UI can render a plan card after reload. session.context.append(Message(role="assistant", content=plan_text)) + session.messages.append(Message(role="assistant", content=plan_text, is_plan=True)) log.debug("agent.plan_ready", phases=2, length=len(plan_text)) return [PlanReady(plan=plan_text)] diff --git a/navi/llm/base.py b/navi/llm/base.py index 8e13c03..56fe714 100644 --- a/navi/llm/base.py +++ b/navi/llm/base.py @@ -42,6 +42,10 @@ created_at: datetime | None = None # marks a compressed history block injected by the context compressor is_summary: bool = False + # reasoning text produced during this turn (not sent to LLM, display only) + thinking: str | None = None + # marks a planning block — stored in messages for UI rendering, not as LLM context + is_plan: bool = False class LLMResponse(BaseModel):