diff --git a/CLAUDE.md b/CLAUDE.md index 3715cd2..987f26f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,17 +47,21 @@ #### Thinking mechanics (per-profile flags in `config.json`) Every autonomous-reasoning feature is gated by a flag on `AgentProfile`. New mechanics always add a flag first. +Full reference: `docs/profiles.md`. | Flag | Default | Purpose | |------|---------|---------| | `think_enabled` | `true` | Extended LLM reasoning on every call | -| `planning_enabled` | `false` | Planning phase before tool loop | +| `planning_enabled` | `false` | Run planning pipeline on every message (first-message always runs it) | +| `planning_mandatory` | `false` | `true` = DIRECT shortcut disabled, all phases always run | +| `planning_phase1_enabled` | `true` | Phase 1: task analysis | +| `planning_phase2_enabled` | `false` | Phase 2: 3-advisor review (adds ~3 LLM calls) | +| `planning_phase3_enabled` | `true` | Phase 3: structured execution plan | | `iteration_budget_enabled` | `true` | Injects remaining iterations into context | -| `planning_reflect_enabled` | `false` | Reflect 3-advisor pass in planning phase 1 | | `goal_anchoring_enabled` | `true` | Goal reminder every N iterations | | `goal_anchoring_interval` | `5` | N for goal anchoring | | `anti_stall_enabled` | `true` | Stall detector (N iterations without todo progress) | -| `anti_stall_threshold` | `3` | Stall threshold in iterations | +| `anti_stall_threshold` | `8` | Stall threshold in iterations | | `step_validation_enabled` | `false` | LLM check after each todo step (adds latency) | | `adaptive_replan_enabled` | `false` | Re-planning on step failure | diff --git a/docs/profiles.md b/docs/profiles.md index 4ed7922..b31d62c 100644 --- a/docs/profiles.md +++ b/docs/profiles.md @@ -9,38 +9,70 @@ - `system_prompt.txt` — domain-specific instructions - `subagent_system_prompt.txt` — injected into subagents spawned from this profile (optional) -```python -@dataclass -class AgentProfile: - id: str # unique identifier - name: str - description: str - system_prompt: str # loaded from system_prompt.txt - enabled_tools: list[str] # tools available in the main loop - llm_backend: str = "ollama" - model: list[str] = ["gemma4:31b-cloud"] # priority list; fallback tries models in order - max_iterations: int = 10 - temperature: float = 0.7 - planning_enabled: bool = False - short_description: str = "" # 1-line summary shown to all profiles - full_description: dict = {} # keys: specialization, when_to_use, key_tools +--- - # Thinking mechanics (see docs/agent.md for details) - think_enabled: bool = True - iteration_budget_enabled: bool = True - planning_reflect_enabled: bool = False - goal_anchoring_enabled: bool = True - goal_anchoring_interval: int = 5 - anti_stall_enabled: bool = True - anti_stall_threshold: int = 8 - step_validation_enabled: bool = False - adaptive_replan_enabled: bool = False +## All `config.json` fields - # Sub-agent configuration - subagent_tools: list[str] = [] - subagent_planning_enabled: bool = False - subagent_system_prompt: str = "" # loaded from subagent_system_prompt.txt -``` +### Identity + +| Key | Type | Default | Description | +|---|---|---|---| +| `id` | str | **required** | Unique profile identifier (matches directory name) | +| `name` | str | **required** | Human-readable name shown in UI | +| `description` | str | **required** | Longer description shown in profile picker | +| `short_description` | str | `""` | One-line summary injected into every profile's system prompt (cross-profile awareness) | +| `full_description` | dict | `{}` | Structured dict: `specialization`, `when_to_use`, `key_tools` keys | + +### LLM + +| Key | Type | Default | Description | +|---|---|---|---| +| `llm_backend` | str | `"ollama"` | Backend key: `"ollama"`, `"openai"` | +| `model` | str or list[str] | `["gemma4:31b-cloud"]` | Model priority list — first available wins. String is accepted and auto-wrapped. | +| `temperature` | float | `0.7` | Sampling temperature for main loop calls | +| `max_iterations` | int | `20` | Hard cap on tool-calling iterations per turn | + +### Tools + +| Key | Type | Default | Description | +|---|---|---|---| +| `enabled_tools` | list[str] | **required** | Tool names available in the main loop | +| `subagent_tools` | list[str] | `[]` | Tools available to sub-agents spawned from this profile. Falls back to `enabled_tools` if empty. | + +### Thinking mechanics + +| Key | Type | Default | Description | +|---|---|---|---| +| `think_enabled` | bool | `true` | Pass `think=True` to LLM on every call (extended reasoning). Disable for latency-sensitive profiles. | +| `iteration_budget_enabled` | bool | `true` | Inject remaining iteration count into context so the model knows when to wrap up. | +| `goal_anchoring_enabled` | bool | `true` | Inject a goal-reminder system message every N iterations to prevent drift. | +| `goal_anchoring_interval` | int | `5` | N for goal anchoring. | +| `anti_stall_enabled` | bool | `true` | Detect looping without todo progress and inject a hard warning. | +| `anti_stall_threshold` | int | `8` | Consecutive iterations without progress before stall warning fires. | +| `step_validation_enabled` | bool | `false` | After each todo step is marked done, run a lightweight LLM check: "did the result satisfy the goal?" Adds ~1 LLM call per step. | +| `adaptive_replan_enabled` | bool | `false` | When a todo step is marked failed, trigger a re-planning pass. Depends on `step_validation_enabled`. | + +### Planning + +The planning pipeline runs before the main tool-calling loop and produces a structured execution plan. It has three phases: + +- **Phase 1 — Analysis**: reformulates the task, identifies subtasks and unknowns. Can output `DIRECT` to skip to execution immediately. +- **Phase 2 — Multi-advisor review**: three independent LLM advisors (Critic / Pragmatist / Detailer) critique the Phase 1 analysis. Adds ~3 LLM calls. +- **Phase 3 — Execution plan**: assigns each subtask to `TOOL / AGENT / SELF`. + +| Key | Type | Default | Description | +|---|---|---|---| +| `planning_enabled` | bool | `false` | Run the planning pipeline on every user message (not just the first). First-message planning always runs regardless of this flag. | +| `planning_mandatory` | bool | `false` | `true` — the `DIRECT` shortcut is never offered to the model; all three phases always run. `false` — the model can output `DIRECT` in Phase 1 to skip straight to execution. First-message planning is always forced regardless of this flag. | +| `planning_phase1_enabled` | bool | `true` | Enable Phase 1 (task analysis). When disabled, Phase 3 runs without analysis context. | +| `planning_phase2_enabled` | bool | `false` | Enable Phase 2 (multi-advisor review). Only runs when Phase 1 signals `REFLECT: yes`. | +| `planning_phase3_enabled` | bool | `true` | Enable Phase 3 (structured execution plan). When disabled, only Phase 1 (analysis) runs. | + +### Sub-agent planning + +| Key | Type | Default | Description | +|---|---|---|---| +| `subagent_planning_enabled` | bool | `false` | Sub-agents spawned from this profile also run the planning pipeline before their tool loop. | --- @@ -78,7 +110,7 @@ ## Adding a profile 1. Create directory `navi/profiles/my_profile/` -2. Add `config.json`: +2. Add `config.json` (minimal example): ```json { "id": "my_profile", @@ -87,20 +119,23 @@ "short_description": "...", "model": ["gemma4:31b-cloud", "gemma4:26b-a4b-it-q4_K_M"], "temperature": 0.5, - "max_iterations": 30, + "max_iterations": 20, + "enabled_tools": ["todo", "scratchpad", "web_search", "filesystem"], + "subagent_tools": ["todo", "filesystem", "terminal"], "planning_enabled": true, + "planning_mandatory": false, + "planning_phase1_enabled": true, + "planning_phase2_enabled": false, + "planning_phase3_enabled": true, "think_enabled": true, "iteration_budget_enabled": true, - "planning_reflect_enabled": false, "goal_anchoring_enabled": true, "goal_anchoring_interval": 5, "anti_stall_enabled": true, "anti_stall_threshold": 8, "step_validation_enabled": false, "adaptive_replan_enabled": false, - "subagent_planning_enabled": false, - "subagent_tools": ["todo", "filesystem", "terminal"], - "enabled_tools": ["todo", "scratchpad", "web_search", "filesystem"] + "subagent_planning_enabled": false } ``` 3. Add `system_prompt.txt` with domain-specific instructions. diff --git a/navi/core/agent.py b/navi/core/agent.py index fe1c6a3..1c078c2 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -584,9 +584,12 @@ # Planning phase — always runs on the first user message in a session; # on subsequent messages uses the profile's planning_enabled flag. + # force_plan suppresses the DIRECT shortcut: first message is always forced, + # and planning_mandatory extends that to every subsequent message. _is_first_message = sum(1 for m in session.messages if m.role == "user") == 1 + _force_plan = _is_first_message or profile.planning_mandatory if _is_first_message or profile.planning_enabled: - async for _ev in self._run_planning(session.context, profile, llm, mem, tool_schemas, messages=session.messages, force_plan=_is_first_message): + async for _ev in self._run_planning(session.context, profile, llm, mem, tool_schemas, messages=session.messages, force_plan=_force_plan): if isinstance(_ev, AIHelperTokensUsed): _subagent_tokens += _ev.total elif isinstance(_ev, PlanningDebugData): @@ -955,98 +958,103 @@ # Debug log — collected across all phases, yielded at the end (main agent only) _dbg: dict = {"timestamp": datetime.now(timezone.utc).isoformat(), "result": "plan", "phases": {}} - # ── Phase 1: Task analysis (with reasoning) ──────────────────────────── - yield PlanningStatus(phase=1, label="Working on it...", is_subagent=is_subagent) _base_sys = system_prompt_override if system_prompt_override is not None else self._build_system_prompt(profile) - phase1_system = Message( - role="system", - content=( - _base_sys - + "\n\n---\n\n" - "[PLANNING — PHASE 1: ANALYSIS]\n\n" - "Read the user's latest request.\n\n" - + ( - "" - if force_plan else - "If it is a simple question, casual conversation, or answerable in one step " - "without tools — respond with exactly: DIRECT\n\n" + + # ── Phase 1: Task analysis (with reasoning) ──────────────────────────── + analysis: str = "" + if profile.planning_phase1_enabled: + yield PlanningStatus(phase=1, label="Working on it...", is_subagent=is_subagent) + phase1_system = Message( + role="system", + content=( + _base_sys + + "\n\n---\n\n" + "[PLANNING — PHASE 1: ANALYSIS]\n\n" + "Read the user's latest request.\n\n" + + ( + "" + if force_plan else + "If it is a simple question, casual conversation, or answerable in one step " + "without tools — respond with exactly: DIRECT\n\n" + ) + + available_tools_block + + "Analyse the request and output:\n\n" + "TASK: [one clear sentence — what actually needs to be done]\n" + "GOAL: [how you will know the task is complete]\n" + "UNKNOWNS: [genuine uncertainties that could block execution, or NONE]\n" + "RESOURCES:\n" + "- [tool_name]: [what it does] — [limitation if any] — [alternative if limitation blocks the goal]\n" + "- context sources: [which of memory / NAVI.md / web you will check and why]\n" + "SUBTASKS:\n" + "1. [discrete unit of work]\n" + "2. [discrete unit of work]\n" + "ATOMICITY: For each subtask that requires multiple actions — if it fails halfway, " + "is any partial result still useful? If not, split it into smaller steps where " + "each one delivers an independent, usable result on its own.\n" + "REFLECT: yes — if the task is complex (multiple unknowns, external APIs, " + "research required, or high-stakes/irreversible actions); " + "no — if it is straightforward and the path is clear.\n" + "COMMITMENTS: [follow the plan step by step using the todo tool; gather any missing context independently without asking the user]\n\n" + "Rules: maximum 6 subtasks. Each must be concrete and actionable. " + "No execution yet — analysis only." + ), + ) + phase1_ctx: list[Message] = [phase1_system] + if mem: + phase1_ctx.append(mem) + phase1_ctx.extend(m for m in context if m.role != "system") + + try: + r1 = await asyncio.wait_for( + llm.complete(phase1_ctx, tools=None, temperature=0.3, model=profile.model, think=profile.think_enabled), + timeout=settings.llm_complete_timeout, ) - + available_tools_block - + "Analyse the request and output:\n\n" - "TASK: [one clear sentence — what actually needs to be done]\n" - "GOAL: [how you will know the task is complete]\n" - "UNKNOWNS: [genuine uncertainties that could block execution, or NONE]\n" - "RESOURCES:\n" - "- [tool_name]: [what it does] — [limitation if any] — [alternative if limitation blocks the goal]\n" - "- context sources: [which of memory / NAVI.md / web you will check and why]\n" - "SUBTASKS:\n" - "1. [discrete unit of work]\n" - "2. [discrete unit of work]\n" - "ATOMICITY: For each subtask that requires multiple actions — if it fails halfway, " - "is any partial result still useful? If not, split it into smaller steps where " - "each one delivers an independent, usable result on its own.\n" - "REFLECT: yes — if the task is complex (multiple unknowns, external APIs, " - "research required, or high-stakes/irreversible actions); " - "no — if it is straightforward and the path is clear.\n" - "COMMITMENTS: [follow the plan step by step using the todo tool; gather any missing context independently without asking the user]\n\n" - "Rules: maximum 6 subtasks. Each must be concrete and actionable. " - "No execution yet — analysis only." - ), - ) - phase1_ctx: list[Message] = [phase1_system] - if mem: - phase1_ctx.append(mem) - phase1_ctx.extend(m for m in context if m.role != "system") + analysis = (r1.content or "").strip() + except asyncio.TimeoutError: + log.warning("agent.planning_phase1_timeout", timeout=settings.llm_complete_timeout) + _dbg["result"] = "phase1_timeout" + if not is_subagent: + yield PlanningDebugData(log=_dbg) + return + except Exception: + log.warning("agent.planning_phase1_failed", exc_info=True) + _dbg["result"] = "phase1_error" + if not is_subagent: + yield PlanningDebugData(log=_dbg) + return - try: - r1 = await asyncio.wait_for( - llm.complete(phase1_ctx, tools=None, temperature=0.3, model=profile.model, think=profile.think_enabled), - timeout=settings.llm_complete_timeout, - ) - analysis = (r1.content or "").strip() - except asyncio.TimeoutError: - log.warning("agent.planning_phase1_timeout", timeout=settings.llm_complete_timeout) - _dbg["result"] = "phase1_timeout" - if not is_subagent: - yield PlanningDebugData(log=_dbg) - return - except Exception: - log.warning("agent.planning_phase1_failed", exc_info=True) - _dbg["result"] = "phase1_error" - if not is_subagent: - yield PlanningDebugData(log=_dbg) - return + if r1.prompt_tokens or r1.completion_tokens: + yield AIHelperTokensUsed( + prompt_tokens=r1.prompt_tokens or 0, + completion_tokens=r1.completion_tokens or 0, + ) - if r1.prompt_tokens or r1.completion_tokens: - yield AIHelperTokensUsed( - prompt_tokens=r1.prompt_tokens or 0, - completion_tokens=r1.completion_tokens or 0, - ) + _dbg["phases"]["1"] = { + "output": analysis, + "prompt_tokens": r1.prompt_tokens or 0, + "completion_tokens": r1.completion_tokens or 0, + } - _dbg["phases"]["1"] = { - "output": analysis, - "prompt_tokens": r1.prompt_tokens or 0, - "completion_tokens": r1.completion_tokens or 0, - } + if not analysis or analysis.upper().startswith("DIRECT"): + log.debug("agent.planning_skipped", reason="direct") + _dbg["result"] = "direct" + if not is_subagent: + yield PlanningDebugData(log=_dbg) + return - if not analysis or analysis.upper().startswith("DIRECT"): - log.debug("agent.planning_skipped", reason="direct") - _dbg["result"] = "direct" - if not is_subagent: - yield PlanningDebugData(log=_dbg) - return - - if _stop and _stop.is_set(): - log.debug("agent.planning_stopped", phase=1) - return + if _stop and _stop.is_set(): + log.debug("agent.planning_stopped", phase=1) + return + else: + log.debug("agent.planning_phase1_skipped") # ── Phase 2: Multi-perspective review (conditional) ──────────────────── - # Runs only when profile.planning_reflect_enabled=True AND phase 1 signals + # Runs only when planning_phase2_enabled=True AND phase 1 signals # that the task is complex enough to warrant independent critique. advisor_feedback: str = "" needs_reflect = bool(_re.search(r"REFLECT\s*:\s*yes", analysis, _re.IGNORECASE)) - if profile.planning_reflect_enabled and needs_reflect and not is_subagent: + if profile.planning_phase2_enabled and needs_reflect and not is_subagent: yield PlanningStatus(phase=2, label="Consulting advisors...", is_subagent=is_subagent) _ADVISORS = [ @@ -1128,6 +1136,13 @@ return # ── Phase 3: Execution plan ──────────────────────────────────────────── + if not profile.planning_phase3_enabled: + log.debug("agent.planning_phase3_skipped") + _dbg["result"] = "phase1_only" + if not is_subagent: + yield PlanningDebugData(log=_dbg) + return + yield PlanningStatus(phase=3, label="Building execution plan...", is_subagent=is_subagent) advisor_block = ( diff --git a/navi/profiles/base.py b/navi/profiles/base.py index e140656..b8c111c 100644 --- a/navi/profiles/base.py +++ b/navi/profiles/base.py @@ -44,9 +44,22 @@ # model knows when to wrap up instead of hitting the limit blindly. iteration_budget_enabled: bool = True - # Run reflect's 3 advisors (Critic/Pragmatist/Detailer) inside planning - # phase 1 to catch blind spots before execution starts. Adds ~3 LLM calls. - planning_reflect_enabled: bool = False + # ── Planning phases ─────────────────────────────────────────────────────── + # planning_mandatory: if True, the DIRECT shortcut is never offered to the + # model — planning always runs in full. If False, the model can output DIRECT + # to skip straight to execution (simple requests bypass planning). + # First-message planning is always forced regardless of this flag. + planning_mandatory: bool = False + + # Individual phase switches — allow disabling expensive phases for profiles + # that don't need them. + # Phase 1: task analysis (TASK/GOAL/UNKNOWNS). Entry point for the pipeline. + planning_phase1_enabled: bool = True + # Phase 2: multi-perspective review (Critic/Pragmatist/Detailer advisors). + # Adds ~3 LLM calls; only useful for complex multi-step tasks. + planning_phase2_enabled: bool = False + # Phase 3: structured execution plan (numbered steps with TOOL/AGENT/SELF). + planning_phase3_enabled: bool = True # Inject a goal-reminder system message every N iterations to prevent drift # on long tasks. N = goal_anchoring_interval. diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json index 69036e4..4b209c3 100644 --- a/navi/profiles/developer/config.json +++ b/navi/profiles/developer/config.json @@ -19,7 +19,6 @@ "subagent_planning_enabled": true, "think_enabled": true, "iteration_budget_enabled": true, - "planning_reflect_enabled": false, "goal_anchoring_enabled": true, "goal_anchoring_interval": 5, "anti_stall_enabled": true, @@ -58,5 +57,9 @@ "spawn_agent", "share_file", "email_manager" - ] -} \ No newline at end of file + ], + "planning_mandatory": false, + "planning_phase1_enabled": true, + "planning_phase2_enabled": false, + "planning_phase3_enabled": true +} diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py index 3b21874..6ad7fdb 100644 --- a/navi/profiles/loader.py +++ b/navi/profiles/loader.py @@ -59,6 +59,10 @@ else "" ) + # planning_phase2_enabled supersedes the old planning_reflect_enabled key. + # If only the old key is present, migrate its value transparently. + phase2_default = config.get("planning_reflect_enabled", False) + profiles.append(AgentProfile( id=config["id"], name=config["name"], @@ -70,8 +74,20 @@ temperature=config.get("temperature", 0.7), max_iterations=config.get("max_iterations", 20), planning_enabled=config.get("planning_enabled", False), + planning_mandatory=config.get("planning_mandatory", False), + planning_phase1_enabled=config.get("planning_phase1_enabled", True), + planning_phase2_enabled=config.get("planning_phase2_enabled", phase2_default), + planning_phase3_enabled=config.get("planning_phase3_enabled", True), short_description=config.get("short_description", ""), full_description=config.get("full_description", {}), + think_enabled=config.get("think_enabled", True), + iteration_budget_enabled=config.get("iteration_budget_enabled", True), + goal_anchoring_enabled=config.get("goal_anchoring_enabled", True), + goal_anchoring_interval=config.get("goal_anchoring_interval", 5), + anti_stall_enabled=config.get("anti_stall_enabled", True), + anti_stall_threshold=config.get("anti_stall_threshold", 8), + step_validation_enabled=config.get("step_validation_enabled", False), + adaptive_replan_enabled=config.get("adaptive_replan_enabled", False), subagent_tools=config.get("subagent_tools", []), subagent_planning_enabled=config.get("subagent_planning_enabled", False), subagent_system_prompt=subagent_system_prompt, diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json index 8f52c5a..1bb757e 100644 --- a/navi/profiles/secretary/config.json +++ b/navi/profiles/secretary/config.json @@ -19,7 +19,6 @@ "subagent_planning_enabled": true, "think_enabled": true, "iteration_budget_enabled": true, - "planning_reflect_enabled": false, "goal_anchoring_enabled": true, "goal_anchoring_interval": 5, "anti_stall_enabled": true, @@ -58,5 +57,9 @@ "share_file", "weather", "email_manager" - ] -} \ No newline at end of file + ], + "planning_mandatory": false, + "planning_phase1_enabled": true, + "planning_phase2_enabled": false, + "planning_phase3_enabled": true +} diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json index 0f7e679..b222fb1 100644 --- a/navi/profiles/server_admin/config.json +++ b/navi/profiles/server_admin/config.json @@ -19,7 +19,6 @@ "subagent_planning_enabled": true, "think_enabled": true, "iteration_budget_enabled": true, - "planning_reflect_enabled": false, "goal_anchoring_enabled": true, "goal_anchoring_interval": 5, "anti_stall_enabled": true, @@ -58,5 +57,9 @@ "spawn_agent", "share_file", "email_manager" - ] -} \ No newline at end of file + ], + "planning_mandatory": false, + "planning_phase1_enabled": true, + "planning_phase2_enabled": false, + "planning_phase3_enabled": true +} diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json index 471fb84..354e9d3 100644 --- a/navi/profiles/tool_developer/config.json +++ b/navi/profiles/tool_developer/config.json @@ -19,7 +19,6 @@ "subagent_planning_enabled": true, "think_enabled": true, "iteration_budget_enabled": true, - "planning_reflect_enabled": false, "goal_anchoring_enabled": true, "goal_anchoring_interval": 5, "anti_stall_enabled": true, @@ -67,5 +66,9 @@ "spawn_agent", "share_file", "email_manager" - ] -} \ No newline at end of file + ], + "planning_mandatory": false, + "planning_phase1_enabled": true, + "planning_phase2_enabled": false, + "planning_phase3_enabled": true +}