diff --git a/client/js/app.js b/client/js/app.js index 432fa35..c9837a3 100644 --- a/client/js/app.js +++ b/client/js/app.js @@ -6,6 +6,7 @@ appendSubagentStep, finalizeSubagentStep, appendTurnThinkingCard, appendSubagentThinking, appendThinkingCard, finalizeThinkingCard, + appendPlanCard, appendTypingIndicator, removeTypingIndicator, appendError, showEmptyState, scrollToBottom, appendSummaryCard, appendCompressionNotice } from './chat.js'; @@ -325,6 +326,13 @@ break; } + case 'plan_ready': + removeTypingIndicator(messagesEl); + appendPlanCard(messagesEl, event.plan); + appendTypingIndicator(messagesEl); + scrollToBottom(messagesEl); + break; + case 'context_compressed': appendCompressionNotice(messagesEl); scrollToBottom(messagesEl); diff --git a/client/js/chat.js b/client/js/chat.js index 58b2396..6a9a818 100644 --- a/client/js/chat.js +++ b/client/js/chat.js @@ -437,6 +437,28 @@ } /** + * Plan card β€” shown before tool calls when planning_enabled is set on the profile. + * Collapsed by default (plan is complete when received, not streaming). + */ +export function appendPlanCard(el, plan) { + const card = document.createElement('div'); + card.className = 'plan-card'; + + const header = document.createElement('div'); + header.className = 'plan-header'; + header.innerHTML = 'πŸ—ΊοΈPlan'; + + const body = document.createElement('div'); + body.className = 'plan-body'; + body.appendChild(renderMarkdown(plan)); + + header.addEventListener('click', () => card.classList.toggle('open')); + card.append(header, body); + el.appendChild(card); + return card; +} + +/** * Thinking block from a tool-calling turn (complete() β€” full text, not streaming). * Rendered collapsed β€” content is already complete when received. */ diff --git a/client/style.css b/client/style.css index 8e9926a..d99b020 100644 --- a/client/style.css +++ b/client/style.css @@ -23,6 +23,9 @@ --thinking-border: #1e3a5f; --thinking-text: #6b9fd4; --thinking-pre-text: #5a8ab0; + --plan-bg: #111a12; + --plan-border: #1e4a20; + --plan-text: #6db86f; --input-bg: #1e1e1e; --radius: 12px; --shadow: 0 1px 4px rgba(0,0,0,0.4); @@ -581,6 +584,49 @@ color: var(--thinking-pre-text); } +/* ── Plan card ───────────────────────────────────────── */ + +.plan-card { + align-self: flex-start; + max-width: 84%; + background: var(--plan-bg); + border: 1px solid var(--plan-border); + border-radius: var(--radius); + font-size: 12px; + color: var(--plan-text); +} + +.plan-header { + display: flex; + align-items: center; + gap: 7px; + padding: 8px 12px; + cursor: pointer; + user-select: none; + font-weight: 600; + border-radius: var(--radius); +} +.plan-header:hover { background: rgba(255,255,255,0.03); } +.plan-icon { font-size: 14px; } +.plan-label { flex: 1; } +.plan-card:not(.open) .plan-header::after { content: 'β€Ί'; font-size: 16px; opacity: 0.5; } +.plan-card.open .plan-header::after { content: 'β€Ή'; font-size: 16px; opacity: 0.5; } + +.plan-body { + border-top: 1px solid var(--plan-border); + padding: 10px 14px; + display: none; +} +.plan-card.open .plan-body { + display: block; + animation: fadeSlide 0.18s ease; +} +.plan-body .prose { color: var(--plan-text); font-size: 12px; } +.plan-body .prose ol, +.plan-body .prose ul { padding-left: 1.4em; } +.plan-body .prose li { margin: 3px 0; } +.plan-body .prose p { margin: 4px 0; } + /* Typing indicator */ .typing { align-self: flex-start; @@ -874,6 +920,7 @@ .msg { max-width: 90%; } .tool-card, .thinking-card, + .plan-card, .summary-card { max-width: 96%; } /* Tighter message padding */ diff --git a/navi/api/websocket.py b/navi/api/websocket.py index 0f837fd..4a2c103 100644 --- a/navi/api/websocket.py +++ b/navi/api/websocket.py @@ -25,7 +25,7 @@ from navi.api.deps import get_session_store from navi.core import Agent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent -from navi.core.events import ProfileSwitched, StreamStopped, ToolStarted, TurnThinking +from navi.core.events import PlanReady, ProfileSwitched, StreamStopped, ToolStarted, TurnThinking from navi.exceptions import MaxIterationsReached, NaviError, SessionNotFound router = APIRouter(tags=["websocket"]) @@ -105,6 +105,8 @@ return {"type": "profile_switched", "profile_id": event.profile_id, "profile_name": event.profile_name} if isinstance(event, StreamStopped): return {"type": "stream_stopped"} + if isinstance(event, PlanReady): + return {"type": "plan_ready", "plan": event.plan} return None diff --git a/navi/core/agent.py b/navi/core/agent.py index 84e4567..9e9b37b 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -32,6 +32,7 @@ from .events import ( AgentEvent, ContextCompressed, + PlanReady, StreamEnd, StreamStopped, TextDelta, @@ -334,6 +335,14 @@ stop_event = current_stop_event.get() + # Planning phase β€” runs a fast, non-streaming LLM call to produce a + # step-by-step plan BEFORE the tool-calling loop starts. The plan is + # injected into session.context as an assistant message so the model + # naturally continues from it, and emitted as PlanReady for the UI. + if profile.planning_enabled: + for _ev in await self._run_planning(session, profile, llm, mem): + yield _ev + # Tool-calling loop β€” uses stream_complete() for every turn so thinking # is captured in real-time via ThinkingDelta/ThinkingEnd events. for iteration in range(profile.max_iterations): @@ -491,6 +500,56 @@ # Internal helpers # ------------------------------------------------------------------ + async def _run_planning( + self, + session, + profile, + llm: LLMBackend, + mem: "Message | None", + ) -> list[AgentEvent]: + """ + Pre-loop planning phase: ask the LLM to outline a step-by-step plan + for the current request (no tools, think=False for speed). + + The plan is injected into session.context as an assistant message so + the model sees it as its own prior statement and naturally continues. + Returns a list of events to yield (either [PlanReady] or [] on failure). + """ + planning_system = Message( + role="system", + content=( + self._build_system_prompt(profile.system_prompt) + + "\n\n---\n\n" + "[PLANNING] Before taking any actions, outline a concise numbered plan " + "for this request. Max 6 steps. Be specific β€” name which tools you will use " + "and what you expect to find. Do not execute anything yet." + ), + ) + planning_ctx: list[Message] = [planning_system] + if mem: + planning_ctx.append(mem) + # Include all prior context (history) but exclude any existing system messages + planning_ctx.extend(m for m in session.context if m.role != "system") + + try: + response = await llm.complete( + planning_ctx, + tools=None, + temperature=0.3, + model=profile.model, + think=False, + ) + plan_text = (response.content or "").strip() + if not plan_text: + return [] + # Inject plan as assistant message so the main loop starts with it in context + session.context.append(Message(role="assistant", content=plan_text)) + log.debug("agent.plan_ready", length=len(plan_text)) + return [PlanReady(plan=plan_text)] + except Exception: + log.warning("agent.planning_failed", exc_info=True) + return [] + async def _run_workers( self, session, diff --git a/navi/core/events.py b/navi/core/events.py index 75578eb..4f994a1 100644 --- a/navi/core/events.py +++ b/navi/core/events.py @@ -73,6 +73,17 @@ @dataclass +class PlanReady: + """Emitted before the main agent loop when profile.planning_enabled is True. + + The plan text has already been injected into session.context as an assistant + message so the LLM will see it and follow it during execution. + """ + + plan: str + + +@dataclass class TurnThinking: """Full thinking/reasoning block from a tool-calling turn (complete() response). @@ -88,4 +99,5 @@ AgentEvent = ( ToolStarted | ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | StreamStopped | ContextCompressed | TurnThinking | ProfileSwitched + | PlanReady ) diff --git a/navi/core/registry.py b/navi/core/registry.py index dc22862..57980ec 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -15,6 +15,7 @@ MemorySearchTool, SpawnAgentTool, SshExecTool, + ScratchpadTool, SwitchProfileTool, TerminalTool, TodoTool, @@ -111,7 +112,7 @@ memory_forget = MemoryForgetTool(memory_store) if memory_store else None builtins = [WebSearchTool(), FilesystemTool(), HttpRequestTool(), WebViewTool(), CodeExecTool(), TerminalTool(), SshExecTool(), ImageViewTool(), - TodoTool(), reload_tool, write_tool, list_tool, manual_tool] + TodoTool(), ScratchpadTool(), reload_tool, write_tool, list_tool, manual_tool] if memory_search: builtins.extend([memory_search, memory_forget]) for builtin in builtins: diff --git a/navi/profiles/base.py b/navi/profiles/base.py index f611006..ebd8c9e 100644 --- a/navi/profiles/base.py +++ b/navi/profiles/base.py @@ -15,6 +15,7 @@ system_prompt: str enabled_tools: list[str] # tool names; resolved by ToolRegistry at runtime llm_backend: str = "ollama" # backend key, e.g. "ollama", "openai" - model: str = "llama3.2" + model: str = "gemma4:26b-a4b-it-q4_K_M" max_iterations: int = 10 temperature: float = 0.7 + planning_enabled: bool = False # if True, run a planning LLM call before the main loop diff --git a/navi/profiles/secretary.py b/navi/profiles/secretary.py index e1736ce..15d66d4 100644 --- a/navi/profiles/secretary.py +++ b/navi/profiles/secretary.py @@ -6,19 +6,34 @@ description="General-purpose assistant for research, writing, and everyday tasks.", system_prompt="""Mode: general-purpose assistant β€” research, writing, analysis, everyday tasks. -Tool priorities: -1. web_search β€” first choice for any current info, facts, or documentation lookup. -2. code_exec β€” calculations, data processing, parsing; use when Python is cleaner than prose. +## Execution discipline + +A plan has been outlined before you start. Follow it step by step. + +**After each tool call:** check whether the result matches what you expected. If not β€” adjust the plan. + +**Use scratchpad to retain findings between steps:** +- `scratchpad(op="write", section="findings", content="...")` β€” capture key results +- `scratchpad(op="append", section="errors", content="...")` β€” log problems +- `scratchpad(op="read")` β€” recall what you've gathered before writing the final answer + +**Use todo to track progress on multi-step tasks:** +- `todo(op="set", tasks=[...])` β€” create plan (if not already set by planning phase) +- `todo(op="update", index=N, status="done"|"failed"|"skipped")` β€” mark each step + +## Tool priorities +1. web_search β€” first choice for any current info, facts, or documentation. +2. code_exec β€” calculations, data processing, parsing. 3. filesystem β€” read/write local documents and notes. -4. terminal β€” system tasks, scripting, anything shell-native. +4. terminal β€” system tasks, scripting, shell-native work. 5. http_request β€” external APIs, web content not suited for search. 6. image_view β€” whenever an image path or URL is mentioned. -For complex multi-part tasks (3+ tool calls): call todo(op="set", tasks=[...]) first, then execute step by step. Mark each step done/failed with todo(op="update") as you go. - -Output style: concise, structured. When researching, include sources. Match tone and format to what was asked.""", - enabled_tools=["todo", "switch_profile", "web_search", "web_view", "http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent"], +## Output style +Concise, structured. When researching, include sources. Match tone and format to what was asked.""", + enabled_tools=["todo", "scratchpad", "switch_profile", "web_search", "web_view", "http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent"], model="gemma4:26b-a4b-it-q4_K_M", temperature=0.7, max_iterations=100, + planning_enabled=True, ) diff --git a/navi/profiles/server_admin.py b/navi/profiles/server_admin.py index d470397..9f48844 100644 --- a/navi/profiles/server_admin.py +++ b/navi/profiles/server_admin.py @@ -16,13 +16,15 @@ 6. image_view β€” diagrams, screenshots, topology maps. Workflow: -1. For complex tasks (3+ tool calls): call todo(op="set", tasks=[...]) first β€” which hosts, what to check, in what order. -2. Gather data (logs, status, metrics), diagnose, then act. Mark each step with todo(op="update") as you go. -3. Before destructive or irreversible operations, state what you're about to do and why. +1. A plan is outlined before you start β€” follow it step by step. +2. Use `todo(op="set", tasks=[...])` to formalise the plan as a checklist if not already set. Mark each step with `todo(op="update")` as you go. +3. Use `scratchpad` to capture intermediate findings (logs, metrics, errors) so you don't lose context between tool calls. +4. Before destructive or irreversible operations, state what you're about to do and why. When delegating to sub-agents: assign each a single host or a single domain of concern. Include exact connection details and expected output format in every briefing.""", - enabled_tools=["todo", "switch_profile", "terminal", "filesystem", "http_request", "web_view", "web_search", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent", "code_exec"], + enabled_tools=["todo", "scratchpad", "switch_profile", "terminal", "filesystem", "http_request", "web_view", "web_search", "ssh_exec", "image_view", "memory_search", "memory_forget", "reload_tools", "write_tool", "list_tools", "tool_manual", "spawn_agent", "code_exec"], model="gemma4:26b-a4b-it-q4_K_M", temperature=0.2, max_iterations=100, + planning_enabled=True, ) diff --git a/navi/tools/__init__.py b/navi/tools/__init__.py index 6247474..1c07ae0 100644 --- a/navi/tools/__init__.py +++ b/navi/tools/__init__.py @@ -9,6 +9,7 @@ from .memory_forget import MemoryForgetTool from .memory_search import MemorySearchTool from .todo import TodoTool +from .scratchpad import ScratchpadTool from .switch_profile import SwitchProfileTool from .web_search import WebSearchTool from .web_view import WebViewTool @@ -28,5 +29,6 @@ "MemoryForgetTool", "SpawnAgentTool", "TodoTool", + "ScratchpadTool", "SwitchProfileTool", ] diff --git a/navi/tools/scratchpad.py b/navi/tools/scratchpad.py new file mode 100644 index 0000000..bbfbec9 --- /dev/null +++ b/navi/tools/scratchpad.py @@ -0,0 +1,92 @@ +"""Session-scoped scratchpad for capturing working notes during task execution.""" +from __future__ import annotations + +from .base import Tool, ToolResult, current_session_id + +# session_id β†’ {section_name: content} +_pads: dict[str, dict[str, str]] = {} + + +class ScratchpadTool(Tool): + name = "scratchpad" + description = ( + "Working-memory notepad for the current session. " + "Use to capture intermediate findings, partial results, and notes while executing a task β€” " + "so you don't lose track of what you've discovered between tool calls. " + "Sections let you organise notes by topic (e.g. 'findings', 'urls', 'errors')." + ) + parameters = { + "type": "object", + "properties": { + "op": { + "type": "string", + "enum": ["write", "append", "read", "clear"], + "description": ( + "write β€” create/replace a section; " + "append β€” add text to an existing section; " + "read β€” read one section (if 'section' given) or all sections; " + "clear β€” erase one section (if 'section' given) or the whole pad" + ), + }, + "section": { + "type": "string", + "description": ( + "Named section key (e.g. 'findings', 'plan', 'errors'). " + "Defaults to 'main' for write/append. " + "Omit for read/clear to target all sections." + ), + }, + "content": { + "type": "string", + "description": "Text to write or append (required for 'write' and 'append').", + }, + }, + "required": ["op"], + } + + async def execute(self, params: dict) -> ToolResult: + sid = current_session_id.get() or "__default__" + op = params.get("op") + section: str | None = params.get("section") or None + content: str = params.get("content", "") + + pad = _pads.setdefault(sid, {}) + + if op == "write": + if not content: + return ToolResult(success=False, output="", error="'content' is required for 'write'") + key = section or "main" + pad[key] = content + return ToolResult(success=True, output=f"[{key}] written ({len(content)} chars).") + + if op == "append": + if not content: + return ToolResult(success=False, output="", error="'content' is required for 'append'") + key = section or "main" + existing = pad.get(key, "") + pad[key] = (existing + "\n" + content).lstrip("\n") if existing else content + return ToolResult(success=True, output=f"[{key}] updated ({len(pad[key])} chars total).") + + if op == "read": + if section is not None: + text = pad.get(section) + if not text: + return ToolResult(success=True, output=f"[{section}] is empty.") + return ToolResult(success=True, output=f"[{section}]:\n{text}") + # No section β†’ read all + if not pad: + return ToolResult(success=True, output="Scratchpad is empty.") + parts = [f"[{k}]:\n{v}" for k, v in pad.items()] + return ToolResult(success=True, output="\n\n".join(parts)) + + if op == "clear": + if section is not None: + removed = pad.pop(section, None) + return ToolResult( + success=True, + output=f"[{section}] cleared." if removed else f"[{section}] was already empty.", + ) + pad.clear() + return ToolResult(success=True, output="Scratchpad cleared.") + + return ToolResult(success=False, output="", error=f"Unknown op: {op!r}")