diff --git a/navi/api/websocket.py b/navi/api/websocket.py index 65dddb2..aafc556 100644 --- a/navi/api/websocket.py +++ b/navi/api/websocket.py @@ -25,7 +25,7 @@ from navi.api.deps import get_session_store from navi.core import Agent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent -from navi.core.events import PlanReady, ProfileSwitched, StreamStopped, ToolStarted, TurnThinking +from navi.core.events import PlanningStatus, PlanReady, ProfileSwitched, StreamStopped, ToolStarted, TurnThinking from navi.exceptions import MaxIterationsReached, NaviError, SessionNotFound router = APIRouter(tags=["websocket"]) @@ -109,6 +109,8 @@ return {"type": "profile_switched", "profile_id": event.profile_id, "profile_name": event.profile_name} if isinstance(event, StreamStopped): return {"type": "stream_stopped"} + if isinstance(event, PlanningStatus): + return {"type": "planning_status", "phase": event.phase, "label": event.label} if isinstance(event, PlanReady): return {"type": "plan_ready", "plan": event.plan} return None diff --git a/navi/config.py b/navi/config.py index 5931772..c0f1aba 100644 --- a/navi/config.py +++ b/navi/config.py @@ -65,7 +65,7 @@ # Context compression context_compression_enabled: bool = True - context_compression_threshold: float = 0.80 # trigger at 80% of ollama_num_ctx + context_compression_threshold: float = 0.70 # trigger at 70% of ollama_num_ctx context_keep_recent: int = 8 # conversational turns to keep verbatim context_summary_temperature: float = 0.3 context_summary_max_tokens: int = 1024 # max output tokens for the summary LLM call diff --git a/navi/core/agent.py b/navi/core/agent.py index 23f7b80..3273fd0 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -34,6 +34,7 @@ AgentEvent, AIHelperTokensUsed, ContextCompressed, + PlanningStatus, PlanReady, StreamEnd, StreamStopped, @@ -462,8 +463,12 @@ # injected into session.context as an assistant message so the model # naturally continues from it, and emitted as PlanReady for the UI. if profile.planning_enabled: - for _ev in await self._run_planning(session, profile, llm, mem): - yield _ev + async for _ev in self._run_planning(session, profile, llm, mem, tool_schemas): + if isinstance(_ev, AIHelperTokensUsed): + # Accumulate planning token usage into the turn total (not forwarded to WS) + _subagent_tokens += _ev.total + else: + yield _ev # Tool-calling loop — uses stream_complete() for every turn so thinking # is captured in real-time via ThinkingDelta/ThinkingEnd events. @@ -671,22 +676,50 @@ profile, llm: LLMBackend, mem: "Message | None", - ) -> list[AgentEvent]: + tool_schemas: list | None = None, + ): """ - Two-phase planning: + Three-phase planning (async generator): - Phase 1 — Analysis: reformulate the task, identify subtasks and unknowns. - Fast signal-check: if DIRECT, skip planning entirely. + Phase 1 — Analysis (think=True): reformulate the task, identify subtasks and + unknowns. Returns DIRECT for simple requests to skip planning entirely. Phase 2 — Execution plan: assign each subtask to a specific executor - (TOOL / AGENT / SELF) using a structured format. + (TOOL / AGENT / SELF) with awareness of actually available tools. + Phase 3 — AIHelper critic: independent pass that validates executor assignments + against the real tool list and fixes any mismatches. - The phase-2 plan is injected into session.context as an assistant message - so the model naturally continues from it in the main loop. - Returns [PlanReady] on success, [] on skip or failure. + Yields PlanningStatus before each phase so the UI can show progress, + then yields PlanReady when the final plan is ready. + Yields nothing if planning is skipped. """ import re as _re - # ── Phase 1: Task analysis ───────────────────────────────────────────── + # ── Build compact tool list for Phase 2 / Phase 3 ───────────────────── + if tool_schemas: + tool_lines = [] + for schema in tool_schemas: + fn = schema.function if hasattr(schema, "function") else schema.get("function", {}) + name = fn.get("name", "") + desc = (fn.get("description") or "").split("\n")[0][:80] + tool_lines.append(f" - {name}: {desc}") + available_tools_block = ( + "Available tools (use these exact names for TOOL: assignments):\n" + + "\n".join(tool_lines) + + "\n\n" + ) + tool_names_list = ", ".join( + (schema.function if hasattr(schema, "function") else schema.get("function", {})).get("name", "") + for schema in tool_schemas + ) + else: + available_tools_block = "" + tool_names_list = "" + + # Read stop event once — checked between all phases + _stop = current_stop_event.get() + + # ── Phase 1: Task analysis (with reasoning) ──────────────────────────── + yield PlanningStatus(phase=1, label="Analysing task...") phase1_system = Message( role="system", content=( @@ -714,22 +747,33 @@ try: r1 = await asyncio.wait_for( - llm.complete(phase1_ctx, tools=None, temperature=0.3, model=profile.model, think=False), + llm.complete(phase1_ctx, tools=None, temperature=0.3, model=profile.model, think=True), timeout=settings.llm_complete_timeout, ) analysis = (r1.content or "").strip() except asyncio.TimeoutError: log.warning("agent.planning_phase1_timeout", timeout=settings.llm_complete_timeout) - return [] + return except Exception: log.warning("agent.planning_phase1_failed", exc_info=True) - return [] + return + + if r1.prompt_tokens or r1.completion_tokens: + yield AIHelperTokensUsed( + prompt_tokens=r1.prompt_tokens or 0, + completion_tokens=r1.completion_tokens or 0, + ) if not analysis or analysis.upper().startswith("DIRECT"): log.debug("agent.planning_skipped", reason="direct") - return [] + return + + if _stop and _stop.is_set(): + log.debug("agent.planning_stopped", phase=1) + return # ── Phase 2: Execution plan ──────────────────────────────────────────── + yield PlanningStatus(phase=2, label="Building execution plan...") phase2_system = Message( role="system", content=( @@ -739,8 +783,9 @@ "Task analysis:\n\n" f"{analysis}\n\n" "---\n\n" - "Now write the execution plan. For each subtask assign a specific executor:\n" - "- TOOL: — a single tool call is enough\n" + + available_tools_block + + "Now write the execution plan. For each subtask assign a specific executor:\n" + "- TOOL: — a single tool call is enough; use exact tool names from the list above\n" "- AGENT: — needs multiple steps; a subagent must handle it\n" "- SELF — final synthesis or a context-dependent single action only\n\n" "Required output format (use exactly this structure):\n\n" @@ -773,20 +818,88 @@ plan_text = (r2.content or "").strip() except asyncio.TimeoutError: log.warning("agent.planning_phase2_timeout", timeout=settings.llm_complete_timeout) - return [] + return except Exception: log.warning("agent.planning_phase2_failed", exc_info=True) - return [] + return + + if r2.prompt_tokens or r2.completion_tokens: + yield AIHelperTokensUsed( + prompt_tokens=r2.prompt_tokens or 0, + completion_tokens=r2.completion_tokens or 0, + ) if not plan_text: - return [] + return # Must have at least one numbered step if not _re.search(r"^\s*\d+[\.\)]", plan_text, _re.MULTILINE): log.debug("agent.planning_skipped", reason="no_numbered_steps") - return [] + return - # Warn if executor assignments are missing (plan may be malformed) + if _stop and _stop.is_set(): + log.debug("agent.planning_stopped", phase=2) + return + + # ── Phase 3: AIHelper plan critic ────────────────────────────────────── + yield PlanningStatus(phase=3, label="Reviewing plan...") + # Independent pass: validates and corrects executor assignments against + # the actual tool list. Falls back to Phase 2 plan on any failure. + if tool_names_list: + try: + critic_system = ( + "You are a plan validator for an AI assistant. " + "You receive a task analysis, an execution plan draft, and a list of available tools.\n\n" + "Your job:\n" + "1. Every \"TOOL: \" step must use a name from the available tools list exactly. " + "If a tool name is wrong or missing, replace it with the closest available tool, " + "or change the executor to SELF.\n" + "2. A TOOL step must require only a single tool call. " + "If a step clearly needs multiple calls, change its executor to SELF.\n" + "3. Steps must be in logical dependency order — no step may depend on a result " + "from a later step.\n\n" + "Rules:\n" + "- Do not add or remove steps.\n" + "- Do not rewrite descriptions unless fixing an executor.\n" + "- Return the corrected plan in the same ## Plan format.\n" + "- If the plan is already correct, return it unchanged.\n" + "- No commentary. No preamble. Return the plan only." + ) + critic_prompt = ( + f"Available tools: {tool_names_list}\n\n" + f"Task analysis:\n{analysis}\n\n" + f"Execution plan draft:\n{plan_text}" + ) + phase3_ctx = [ + Message(role="system", content=critic_system), + Message(role="user", content=critic_prompt), + ] + + r3 = await asyncio.wait_for( + llm.complete(phase3_ctx, tools=None, temperature=0.1, model=profile.model, think=False), + timeout=settings.llm_complete_timeout, + ) + reviewed = (r3.content or "").strip() + + if r3.prompt_tokens or r3.completion_tokens: + yield AIHelperTokensUsed( + prompt_tokens=r3.prompt_tokens or 0, + completion_tokens=r3.completion_tokens or 0, + ) + + # Accept the reviewed plan only if it still has numbered steps + if reviewed and _re.search(r"^\s*\d+[\.\)]", reviewed, _re.MULTILINE): + plan_text = reviewed + log.debug("agent.planning_phase3_applied", length=len(plan_text)) + else: + log.warning("agent.planning_phase3_invalid", preview=reviewed[:120]) + + except asyncio.TimeoutError: + log.warning("agent.planning_phase3_timeout") + except Exception: + log.warning("agent.planning_phase3_failed", exc_info=True) + + # Warn if executor assignments are still missing (plan may be malformed) if not _re.search(r"(TOOL:|AGENT:|→\s*SELF)", plan_text): log.warning("agent.planning_no_executors", hint="plan lacks TOOL/AGENT/SELF assignments") @@ -794,8 +907,8 @@ # and into messages (with is_plan flag) so the UI can render a plan card after reload. session.context.append(Message(role="assistant", content=plan_text)) session.messages.append(Message(role="assistant", content=plan_text, is_plan=True)) - log.debug("agent.plan_ready", phases=2, length=len(plan_text)) - return [PlanReady(plan=plan_text)] + log.debug("agent.plan_ready", phases=3, length=len(plan_text)) + yield PlanReady(plan=plan_text) async def _run_workers( self, diff --git a/navi/core/compressor.py b/navi/core/compressor.py index c79420f..73840be 100644 --- a/navi/core/compressor.py +++ b/navi/core/compressor.py @@ -18,11 +18,19 @@ _SUMMARIZE_SYSTEM = ( "You are summarizing a conversation history to free up context space. " - "Produce a structured factual summary covering: key facts the user shared, " - "decisions made, tasks completed or in progress, important outputs or findings, " - "any code or config snippets that were produced. " - "Use bullet points grouped by topic. Be thorough — capture enough detail that " - "the assistant can continue the conversation without the original messages. " + "Produce a structured factual summary. Lead with the most important section:\n\n" + "## Current goal\n" + "One sentence: what the assistant is working toward right now.\n\n" + "## Work state\n" + "What has been completed. What is still pending. Any blockers encountered.\n\n" + "## Key facts\n" + "Facts the user shared, decisions made, constraints discovered.\n\n" + "## Outputs\n" + "File paths created or modified, config values set, commands run and their results, " + "code or config snippets that were produced (include verbatim if short).\n\n" + "## Errors\n" + "Failures encountered and how they were resolved (or that they remain unresolved).\n\n" + "Be thorough — the assistant will continue the conversation using only this summary. " "Do not include greetings, filler, or meta-commentary about the summary itself." ) diff --git a/navi/core/events.py b/navi/core/events.py index 47b7384..48c38f4 100644 --- a/navi/core/events.py +++ b/navi/core/events.py @@ -77,6 +77,18 @@ @dataclass +class PlanningStatus: + """Emitted at the start of each planning phase to show progress in the UI. + + phase: 1 = Analysis, 2 = Execution plan, 3 = Plan review (AIHelper critic). + label: short human-readable description shown next to the spinner. + """ + + phase: int + label: str + + +@dataclass class PlanReady: """Emitted before the main agent loop when profile.planning_enabled is True. @@ -125,5 +137,5 @@ AgentEvent = ( ToolStarted | ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | StreamStopped | ContextCompressed | TurnThinking | ProfileSwitched - | PlanReady | SubagentComplete | AIHelperTokensUsed + | PlanningStatus | PlanReady | SubagentComplete | AIHelperTokensUsed ) diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json index ed3b606..0b88f1d 100644 --- a/navi/profiles/secretary/config.json +++ b/navi/profiles/secretary/config.json @@ -10,8 +10,8 @@ }, "llm_backend": "ollama", "model": "gemma4:26b-a4b-it-q4_K_M", - "temperature": 0.7, - "max_iterations": 40, + "temperature": 0.5, + "max_iterations": 25, "planning_enabled": true, "enabled_tools": [ "todo", "scratchpad", "reflect", "switch_profile", "list_profiles", diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json index 2e38fe4..36c567c 100644 --- a/navi/profiles/server_admin/config.json +++ b/navi/profiles/server_admin/config.json @@ -11,7 +11,7 @@ "llm_backend": "ollama", "model": "gemma4:26b-a4b-it-q4_K_M", "temperature": 0.2, - "max_iterations": 40, + "max_iterations": 20, "planning_enabled": true, "enabled_tools": [ "todo", "scratchpad", "reflect", "switch_profile", "list_profiles", diff --git a/navi/tools/scratchpad.py b/navi/tools/scratchpad.py index 15d13c7..81df02a 100644 --- a/navi/tools/scratchpad.py +++ b/navi/tools/scratchpad.py @@ -12,6 +12,8 @@ description = ( "Persistent working memory for the current session. Use it to record findings, " "track artifacts, and pass context to sub-agents. Organised into named sections. " + "MANDATORY: at the start of any multi-step task, write a 'goal' section stating " + "in one sentence what you are trying to achieve. Update it if the goal changes. " "Always read the relevant section before composing a final answer — " "it may contain findings from earlier tool calls." ) @@ -32,6 +34,7 @@ "type": "string", "description": ( "Which section to target. Conventional names: " + "goal (current objective — always write this first), " "findings (discovered facts), artifacts (file paths, outputs), " "errors (tracebacks, failures), verification (DoD checklist), " "context_transfer (briefings for sub-agents), main (general). " diff --git a/navi/tools/terminal.py b/navi/tools/terminal.py index 101ab2d..7be3ea6 100644 --- a/navi/tools/terminal.py +++ b/navi/tools/terminal.py @@ -15,6 +15,7 @@ from .base import Tool, ToolResult _TIMEOUT = 300 +_MAX_OUTPUT_CHARS = 5_000 class TerminalTool(Tool): @@ -80,10 +81,14 @@ if stderr: output_parts.append(f"[stderr]\n{stderr.decode(errors='replace')}") + combined = "\n".join(output_parts) or "(no output)" + if len(combined) > _MAX_OUTPUT_CHARS: + combined = combined[:_MAX_OUTPUT_CHARS] + f"\n…[truncated — {len(combined)} chars total]" + success = proc.returncode == 0 return ToolResult( success=success, - output="\n".join(output_parts) or "(no output)", + output=combined, metadata={"returncode": proc.returncode}, error=None if success else f"Exit code {proc.returncode}", ) @@ -128,10 +133,14 @@ if stderr: output_parts.append(f"[stderr]\n{stderr.decode(errors='replace')}") + combined = "\n".join(output_parts) or "(no output)" + if len(combined) > _MAX_OUTPUT_CHARS: + combined = combined[:_MAX_OUTPUT_CHARS] + f"\n…[truncated — {len(combined)} chars total]" + success = proc.returncode == 0 return ToolResult( success=success, - output="\n".join(output_parts) or "(no output)", + output=combined, metadata={"returncode": proc.returncode}, error=None if success else f"Exit code {proc.returncode}", ) diff --git a/navi/tools/web_search.py b/navi/tools/web_search.py index c1d0b45..743bf99 100644 --- a/navi/tools/web_search.py +++ b/navi/tools/web_search.py @@ -46,7 +46,7 @@ try: results = await asyncio.to_thread( lambda b=backend: list( - DDGS().text(query, backend=b, max_results=max_results) + DDGS().text(query, backend=b, max_results=max_results, region="wt-wt") ) ) if results: @@ -106,7 +106,7 @@ async with httpx.AsyncClient(timeout=15.0) as client: resp = await client.get( "https://api.search.brave.com/res/v1/web/search", - params={"q": query, "count": min(max_results, 20)}, + params={"q": query, "count": min(max_results, 20), "country": "ALL"}, headers={ "Accept": "application/json", "Accept-Encoding": "gzip", @@ -128,6 +128,7 @@ "q": query, "format": "json", "engines": "google,bing,duckduckgo", + "language": "all", }, ) resp.raise_for_status() diff --git a/webclient/src/components/messages/ThinkingCard.vue b/webclient/src/components/messages/ThinkingCard.vue index 7c19f1d..41d4c84 100644 --- a/webclient/src/components/messages/ThinkingCard.vue +++ b/webclient/src/components/messages/ThinkingCard.vue @@ -6,14 +6,25 @@ -
{{ msg.thinking.text }}
+
{{ cleanedText }}
diff --git a/webclient/src/composables/useWebSocket.js b/webclient/src/composables/useWebSocket.js index e7550d4..b96217d 100644 --- a/webclient/src/composables/useWebSocket.js +++ b/webclient/src/composables/useWebSocket.js @@ -109,6 +109,7 @@ case 'thinking_delta': chat.onThinkingDelta(event.delta ?? ''); break case 'thinking_end': chat.onThinkingEnd(); break case 'turn_thinking': chat.onTurnThinking(event); break + case 'planning_status': chat.onPlanningStatus(event); break case 'plan_ready': chat.onPlanReady(event); break case 'tool_started': chat.onToolStarted(event); break case 'tool_call': chat.onToolCall(event); break