diff --git a/client/js/app.js b/client/js/app.js
index 432fa35..c9837a3 100644
--- a/client/js/app.js
+++ b/client/js/app.js
@@ -6,6 +6,7 @@
appendSubagentStep, finalizeSubagentStep,
appendTurnThinkingCard, appendSubagentThinking,
appendThinkingCard, finalizeThinkingCard,
+ appendPlanCard,
appendTypingIndicator, removeTypingIndicator,
appendError, showEmptyState, scrollToBottom,
appendSummaryCard, appendCompressionNotice } from './chat.js';
@@ -325,6 +326,13 @@
break;
}
+ case 'plan_ready':
+ removeTypingIndicator(messagesEl);
+ appendPlanCard(messagesEl, event.plan);
+ appendTypingIndicator(messagesEl);
+ scrollToBottom(messagesEl);
+ break;
+
case 'context_compressed':
appendCompressionNotice(messagesEl);
scrollToBottom(messagesEl);
diff --git a/client/js/chat.js b/client/js/chat.js
index 58b2396..6a9a818 100644
--- a/client/js/chat.js
+++ b/client/js/chat.js
@@ -437,6 +437,28 @@
}
/**
+ * Plan card β shown before tool calls when planning_enabled is set on the profile.
+ * Collapsed by default (plan is complete when received, not streaming).
+ */
+export function appendPlanCard(el, plan) {
+ const card = document.createElement('div');
+ card.className = 'plan-card';
+
+ const header = document.createElement('div');
+ header.className = 'plan-header';
+ header.innerHTML = 'πΊοΈPlan';
+
+ const body = document.createElement('div');
+ body.className = 'plan-body';
+ body.appendChild(renderMarkdown(plan));
+
+ header.addEventListener('click', () => card.classList.toggle('open'));
+ card.append(header, body);
+ el.appendChild(card);
+ return card;
+}
+
+/**
* Thinking block from a tool-calling turn (complete() β full text, not streaming).
* Rendered collapsed β content is already complete when received.
*/
diff --git a/client/style.css b/client/style.css
index 8e9926a..d99b020 100644
--- a/client/style.css
+++ b/client/style.css
@@ -23,6 +23,9 @@
--thinking-border: #1e3a5f;
--thinking-text: #6b9fd4;
--thinking-pre-text: #5a8ab0;
+ --plan-bg: #111a12;
+ --plan-border: #1e4a20;
+ --plan-text: #6db86f;
--input-bg: #1e1e1e;
--radius: 12px;
--shadow: 0 1px 4px rgba(0,0,0,0.4);
@@ -581,6 +584,49 @@
color: var(--thinking-pre-text);
}
+/* ββ Plan card βββββββββββββββββββββββββββββββββββββββββ */
+
+.plan-card {
+ align-self: flex-start;
+ max-width: 84%;
+ background: var(--plan-bg);
+ border: 1px solid var(--plan-border);
+ border-radius: var(--radius);
+ font-size: 12px;
+ color: var(--plan-text);
+}
+
+.plan-header {
+ display: flex;
+ align-items: center;
+ gap: 7px;
+ padding: 8px 12px;
+ cursor: pointer;
+ user-select: none;
+ font-weight: 600;
+ border-radius: var(--radius);
+}
+.plan-header:hover { background: rgba(255,255,255,0.03); }
+.plan-icon { font-size: 14px; }
+.plan-label { flex: 1; }
+.plan-card:not(.open) .plan-header::after { content: 'βΊ'; font-size: 16px; opacity: 0.5; }
+.plan-card.open .plan-header::after { content: 'βΉ'; font-size: 16px; opacity: 0.5; }
+
+.plan-body {
+ border-top: 1px solid var(--plan-border);
+ padding: 10px 14px;
+ display: none;
+}
+.plan-card.open .plan-body {
+ display: block;
+ animation: fadeSlide 0.18s ease;
+}
+.plan-body .prose { color: var(--plan-text); font-size: 12px; }
+.plan-body .prose ol,
+.plan-body .prose ul { padding-left: 1.4em; }
+.plan-body .prose li { margin: 3px 0; }
+.plan-body .prose p { margin: 4px 0; }
+
/* Typing indicator */
.typing {
align-self: flex-start;
@@ -874,6 +920,7 @@
.msg { max-width: 90%; }
.tool-card,
.thinking-card,
+ .plan-card,
.summary-card { max-width: 96%; }
/* Tighter message padding */
diff --git a/navi/api/websocket.py b/navi/api/websocket.py
index 0f837fd..4a2c103 100644
--- a/navi/api/websocket.py
+++ b/navi/api/websocket.py
@@ -25,7 +25,7 @@
from navi.api.deps import get_session_store
from navi.core import Agent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
-from navi.core.events import ProfileSwitched, StreamStopped, ToolStarted, TurnThinking
+from navi.core.events import PlanReady, ProfileSwitched, StreamStopped, ToolStarted, TurnThinking
from navi.exceptions import MaxIterationsReached, NaviError, SessionNotFound
router = APIRouter(tags=["websocket"])
@@ -105,6 +105,8 @@
return {"type": "profile_switched", "profile_id": event.profile_id, "profile_name": event.profile_name}
if isinstance(event, StreamStopped):
return {"type": "stream_stopped"}
+ if isinstance(event, PlanReady):
+ return {"type": "plan_ready", "plan": event.plan}
return None
diff --git a/navi/core/agent.py b/navi/core/agent.py
index 84e4567..9e9b37b 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -32,6 +32,7 @@
from .events import (
AgentEvent,
ContextCompressed,
+ PlanReady,
StreamEnd,
StreamStopped,
TextDelta,
@@ -334,6 +335,14 @@
stop_event = current_stop_event.get()
+ # Planning phase β runs a fast, non-streaming LLM call to produce a
+ # step-by-step plan BEFORE the tool-calling loop starts. The plan is
+ # injected into session.context as an assistant message so the model
+ # naturally continues from it, and emitted as PlanReady for the UI.
+ if profile.planning_enabled:
+ for _ev in await self._run_planning(session, profile, llm, mem):
+ yield _ev
+
# Tool-calling loop β uses stream_complete() for every turn so thinking
# is captured in real-time via ThinkingDelta/ThinkingEnd events.
for iteration in range(profile.max_iterations):
@@ -491,6 +500,56 @@
# Internal helpers
# ------------------------------------------------------------------
+ async def _run_planning(
+ self,
+ session,
+ profile,
+ llm: LLMBackend,
+ mem: "Message | None",
+ ) -> list[AgentEvent]:
+ """
+ Pre-loop planning phase: ask the LLM to outline a step-by-step plan
+ for the current request (no tools, think=False for speed).
+
+ The plan is injected into session.context as an assistant message so
+ the model sees it as its own prior statement and naturally continues.
+ Returns a list of events to yield (either [PlanReady] or [] on failure).
+ """
+ planning_system = Message(
+ role="system",
+ content=(
+ self._build_system_prompt(profile.system_prompt)
+ + "\n\n---\n\n"
+ "[PLANNING] Before taking any actions, outline a concise numbered plan "
+ "for this request. Max 6 steps. Be specific β name which tools you will use "
+ "and what you expect to find. Do not execute anything yet."
+ ),
+ )
+ planning_ctx: list[Message] = [planning_system]
+ if mem:
+ planning_ctx.append(mem)
+ # Include all prior context (history) but exclude any existing system messages
+ planning_ctx.extend(m for m in session.context if m.role != "system")
+
+ try:
+ response = await llm.complete(
+ planning_ctx,
+ tools=None,
+ temperature=0.3,
+ model=profile.model,
+ think=False,
+ )
+ plan_text = (response.content or "").strip()
+ if not plan_text:
+ return []
+ # Inject plan as assistant message so the main loop starts with it in context
+ session.context.append(Message(role="assistant", content=plan_text))
+ log.debug("agent.plan_ready", length=len(plan_text))
+ return [PlanReady(plan=plan_text)]
+ except Exception:
+ log.warning("agent.planning_failed", exc_info=True)
+ return []
+
async def _run_workers(
self,
session,
diff --git a/navi/core/events.py b/navi/core/events.py
index 75578eb..4f994a1 100644
--- a/navi/core/events.py
+++ b/navi/core/events.py
@@ -73,6 +73,17 @@
@dataclass
+class PlanReady:
+ """Emitted before the main agent loop when profile.planning_enabled is True.
+
+ The plan text has already been injected into session.context as an assistant
+ message so the LLM will see it and follow it during execution.
+ """
+
+ plan: str
+
+
+@dataclass
class TurnThinking:
"""Full thinking/reasoning block from a tool-calling turn (complete() response).
@@ -88,4 +99,5 @@
AgentEvent = (
ToolStarted | ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd
| StreamEnd | StreamStopped | ContextCompressed | TurnThinking | ProfileSwitched
+ | PlanReady
)
diff --git a/navi/core/registry.py b/navi/core/registry.py
index dc22862..57980ec 100644
--- a/navi/core/registry.py
+++ b/navi/core/registry.py
@@ -15,6 +15,7 @@
MemorySearchTool,
SpawnAgentTool,
SshExecTool,
+ ScratchpadTool,
SwitchProfileTool,
TerminalTool,
TodoTool,
@@ -111,7 +112,7 @@
memory_forget = MemoryForgetTool(memory_store) if memory_store else None
builtins = [WebSearchTool(), FilesystemTool(), HttpRequestTool(), WebViewTool(),
CodeExecTool(), TerminalTool(), SshExecTool(), ImageViewTool(),
- TodoTool(), reload_tool, write_tool, list_tool, manual_tool]
+ TodoTool(), ScratchpadTool(), reload_tool, write_tool, list_tool, manual_tool]
if memory_search:
builtins.extend([memory_search, memory_forget])
for builtin in builtins:
diff --git a/navi/profiles/base.py b/navi/profiles/base.py
index f611006..ebd8c9e 100644
--- a/navi/profiles/base.py
+++ b/navi/profiles/base.py
@@ -15,6 +15,7 @@
system_prompt: str
enabled_tools: list[str] # tool names; resolved by ToolRegistry at runtime
llm_backend: str = "ollama" # backend key, e.g. "ollama", "openai"
- model: str = "llama3.2"
+ model: str = "gemma4:26b-a4b-it-q4_K_M"
max_iterations: int = 10
temperature: float = 0.7
+ planning_enabled: bool = False # if True, run a planning LLM call before the main loop
diff --git a/navi/profiles/secretary.py b/navi/profiles/secretary.py
index e1736ce..15d66d4 100644
--- a/navi/profiles/secretary.py
+++ b/navi/profiles/secretary.py
@@ -6,19 +6,34 @@
description="General-purpose assistant for research, writing, and everyday tasks.",
system_prompt="""Mode: general-purpose assistant β research, writing, analysis, everyday tasks.
-Tool priorities:
-1. web_search β first choice for any current info, facts, or documentation lookup.
-2. code_exec β calculations, data processing, parsing; use when Python is cleaner than prose.
+## Execution discipline
+
+A plan has been outlined before you start. Follow it step by step.
+
+**After each tool call:** check whether the result matches what you expected. If not β adjust the plan.
+
+**Use scratchpad to retain findings between steps:**
+- `scratchpad(op="write", section="findings", content="...")` β capture key results
+- `scratchpad(op="append", section="errors", content="...")` β log problems
+- `scratchpad(op="read")` β recall what you've gathered before writing the final answer
+
+**Use todo to track progress on multi-step tasks:**
+- `todo(op="set", tasks=[...])` β create plan (if not already set by planning phase)
+- `todo(op="update", index=N, status="done"|"failed"|"skipped")` β mark each step
+
+## Tool priorities
+1. web_search β first choice for any current info, facts, or documentation.
+2. code_exec β calculations, data processing, parsing.
3. filesystem β read/write local documents and notes.
-4. terminal β system tasks, scripting, anything shell-native.
+4. terminal β system tasks, scripting, shell-native work.
5. http_request β external APIs, web content not suited for search.
6. image_view β whenever an image path or URL is mentioned.
-For complex multi-part tasks (3+ tool calls): call todo(op="set", tasks=[...]) first, then execute step by step. Mark each step done/failed with todo(op="update") as you go.
-
-Output style: concise, structured. When researching, include sources. Match tone and format to what was asked.""",
- enabled_tools=["todo", "switch_profile", "web_search", "web_view", "http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent"],
+## Output style
+Concise, structured. When researching, include sources. Match tone and format to what was asked.""",
+ enabled_tools=["todo", "scratchpad", "switch_profile", "web_search", "web_view", "http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent"],
model="gemma4:26b-a4b-it-q4_K_M",
temperature=0.7,
max_iterations=100,
+ planning_enabled=True,
)
diff --git a/navi/profiles/server_admin.py b/navi/profiles/server_admin.py
index d470397..9f48844 100644
--- a/navi/profiles/server_admin.py
+++ b/navi/profiles/server_admin.py
@@ -16,13 +16,15 @@
6. image_view β diagrams, screenshots, topology maps.
Workflow:
-1. For complex tasks (3+ tool calls): call todo(op="set", tasks=[...]) first β which hosts, what to check, in what order.
-2. Gather data (logs, status, metrics), diagnose, then act. Mark each step with todo(op="update") as you go.
-3. Before destructive or irreversible operations, state what you're about to do and why.
+1. A plan is outlined before you start β follow it step by step.
+2. Use `todo(op="set", tasks=[...])` to formalise the plan as a checklist if not already set. Mark each step with `todo(op="update")` as you go.
+3. Use `scratchpad` to capture intermediate findings (logs, metrics, errors) so you don't lose context between tool calls.
+4. Before destructive or irreversible operations, state what you're about to do and why.
When delegating to sub-agents: assign each a single host or a single domain of concern. Include exact connection details and expected output format in every briefing.""",
- enabled_tools=["todo", "switch_profile", "terminal", "filesystem", "http_request", "web_view", "web_search", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent", "code_exec"],
+ enabled_tools=["todo", "scratchpad", "switch_profile", "terminal", "filesystem", "http_request", "web_view", "web_search", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent", "code_exec"],
model="gemma4:26b-a4b-it-q4_K_M",
temperature=0.2,
max_iterations=100,
+ planning_enabled=True,
)
diff --git a/navi/tools/__init__.py b/navi/tools/__init__.py
index 6247474..1c07ae0 100644
--- a/navi/tools/__init__.py
+++ b/navi/tools/__init__.py
@@ -9,6 +9,7 @@
from .memory_forget import MemoryForgetTool
from .memory_search import MemorySearchTool
from .todo import TodoTool
+from .scratchpad import ScratchpadTool
from .switch_profile import SwitchProfileTool
from .web_search import WebSearchTool
from .web_view import WebViewTool
@@ -28,5 +29,6 @@
"MemoryForgetTool",
"SpawnAgentTool",
"TodoTool",
+ "ScratchpadTool",
"SwitchProfileTool",
]
diff --git a/navi/tools/scratchpad.py b/navi/tools/scratchpad.py
new file mode 100644
index 0000000..bbfbec9
--- /dev/null
+++ b/navi/tools/scratchpad.py
@@ -0,0 +1,92 @@
+"""Session-scoped scratchpad for capturing working notes during task execution."""
+from __future__ import annotations
+
+from .base import Tool, ToolResult, current_session_id
+
+# session_id β {section_name: content}
+_pads: dict[str, dict[str, str]] = {}
+
+
+class ScratchpadTool(Tool):
+ name = "scratchpad"
+ description = (
+ "Working-memory notepad for the current session. "
+ "Use to capture intermediate findings, partial results, and notes while executing a task β "
+ "so you don't lose track of what you've discovered between tool calls. "
+ "Sections let you organise notes by topic (e.g. 'findings', 'urls', 'errors')."
+ )
+ parameters = {
+ "type": "object",
+ "properties": {
+ "op": {
+ "type": "string",
+ "enum": ["write", "append", "read", "clear"],
+ "description": (
+ "write β create/replace a section; "
+ "append β add text to an existing section; "
+ "read β read one section (if 'section' given) or all sections; "
+ "clear β erase one section (if 'section' given) or the whole pad"
+ ),
+ },
+ "section": {
+ "type": "string",
+ "description": (
+ "Named section key (e.g. 'findings', 'plan', 'errors'). "
+ "Defaults to 'main' for write/append. "
+ "Omit for read/clear to target all sections."
+ ),
+ },
+ "content": {
+ "type": "string",
+ "description": "Text to write or append (required for 'write' and 'append').",
+ },
+ },
+ "required": ["op"],
+ }
+
+ async def execute(self, params: dict) -> ToolResult:
+ sid = current_session_id.get() or "__default__"
+ op = params.get("op")
+ section: str | None = params.get("section") or None
+ content: str = params.get("content", "")
+
+ pad = _pads.setdefault(sid, {})
+
+ if op == "write":
+ if not content:
+ return ToolResult(success=False, output="", error="'content' is required for 'write'")
+ key = section or "main"
+ pad[key] = content
+ return ToolResult(success=True, output=f"[{key}] written ({len(content)} chars).")
+
+ if op == "append":
+ if not content:
+ return ToolResult(success=False, output="", error="'content' is required for 'append'")
+ key = section or "main"
+ existing = pad.get(key, "")
+ pad[key] = (existing + "\n" + content).lstrip("\n") if existing else content
+ return ToolResult(success=True, output=f"[{key}] updated ({len(pad[key])} chars total).")
+
+ if op == "read":
+ if section is not None:
+ text = pad.get(section)
+ if not text:
+ return ToolResult(success=True, output=f"[{section}] is empty.")
+ return ToolResult(success=True, output=f"[{section}]:\n{text}")
+ # No section β read all
+ if not pad:
+ return ToolResult(success=True, output="Scratchpad is empty.")
+ parts = [f"[{k}]:\n{v}" for k, v in pad.items()]
+ return ToolResult(success=True, output="\n\n".join(parts))
+
+ if op == "clear":
+ if section is not None:
+ removed = pad.pop(section, None)
+ return ToolResult(
+ success=True,
+ output=f"[{section}] cleared." if removed else f"[{section}] was already empty.",
+ )
+ pad.clear()
+ return ToolResult(success=True, output="Scratchpad cleared.")
+
+ return ToolResult(success=False, output="", error=f"Unknown op: {op!r}")