diff --git a/navi/core/agent.py b/navi/core/agent.py index b549fa8..4e522b8 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -180,6 +180,19 @@ return frozenset() +def _todo_failed_steps(session_id: str) -> frozenset[tuple[int, str]]: + """Return a frozenset of (1-based index, task_text) for steps currently marked failed.""" + try: + from navi.tools.todo import _plans + return frozenset( + (i + 1, t.text) + for i, t in enumerate(_plans.get(session_id, [])) + if t.status == "failed" + ) + except Exception: + return frozenset() + + class Agent: def __init__( self, @@ -588,6 +601,11 @@ _stall_repeat_tools = 0 # iterations with identical tool calls as the previous turn _prev_tool_sigs: frozenset[tuple[str, str]] = frozenset() + # Adaptive re-plan state — detect newly-failed todo steps and inject a + # re-planning prompt on the following iteration so the model revises its plan. + _known_failed: frozenset[tuple[int, str]] = frozenset() + _replan_msg: str | None = None + # Tool-calling loop — uses stream_complete() for every turn so thinking # is captured in real-time via ThinkingDelta/ThinkingEnd events. for iteration in range(profile.max_iterations): @@ -616,6 +634,11 @@ # Snapshot todo state before this iteration (for stall detection after) _todo_snapshot_before = _todo_status_snapshot(session_id) + # Adaptive re-plan: inject queued re-plan message from previous iteration + if profile.adaptive_replan_enabled and _replan_msg: + built_ctx.append(Message(role="system", content=_replan_msg)) + _replan_msg = None + if profile.anti_stall_enabled and iteration > 0: stalled = ( _stall_no_todo >= profile.anti_stall_threshold @@ -817,6 +840,25 @@ _stall_repeat_tools = 0 _prev_tool_sigs = cur_sigs + # Adaptive re-plan: detect steps that were newly marked failed this iteration. + if profile.adaptive_replan_enabled: + current_failed = _todo_failed_steps(session_id) + new_failures = current_failed - _known_failed + _known_failed = current_failed + if new_failures: + failed_labels = ", ".join( + f'step {idx} ("{text}")' + for idx, text in sorted(new_failures) + ) + _replan_msg = ( + f"[Adaptive re-plan] {failed_labels} just failed. " + "Before continuing, revise your plan: call todo(op=\"set\") or todo(op=\"update\") " + "to replace or skip the remaining pending steps with a revised approach " + "that accounts for what went wrong. Then continue execution." + ) + log.info("agent.adaptive_replan_queued", failures=len(new_failures), + session_id=session_id) + # 7. If switch_profile was called this iteration, reload profile + tools. # switch_profile updates the DB but run_stream() holds a local session # object — without this check the final save would overwrite the change @@ -853,14 +895,18 @@ is_subagent: bool = False, ): """ - Three-phase planning (async generator): + Planning pipeline (async generator): - Phase 1 — Analysis (think=True): reformulate the task, identify subtasks and - unknowns. Returns DIRECT for simple requests to skip planning entirely. - Phase 2 — Execution plan: assign each subtask to a specific executor - (TOOL / AGENT / SELF) with awareness of actually available tools. - Phase 3 — AIHelper critic: independent pass that validates executor assignments - against the real tool list and fixes any mismatches. + Phase 1 — Analysis (think=profile.think_enabled): reformulate the task, + identify subtasks and unknowns. Outputs DIRECT for simple requests. + Outputs Reflect: yes/no to signal whether multi-perspective review + is warranted. + Phase 2 — Multi-perspective review (conditional, think=False, parallel): + Three advisors (Critic / Pragmatist / Detailer) independently + critique the draft analysis. Runs only when + profile.planning_reflect_enabled=True AND phase 1 outputs Reflect: yes. + Phase 3 — Execution plan (think=False): assigns each subtask to TOOL / AGENT / SELF. + If phase 2 ran, advisor feedback is embedded in the prompt. Yields PlanningStatus before each phase so the UI can show progress, then yields PlanReady when the final plan is ready. @@ -913,7 +959,10 @@ "UNKNOWNS: [genuine uncertainties that could block execution, or NONE]\n" "SUBTASKS:\n" "1. [discrete unit of work]\n" - "2. [discrete unit of work]\n\n" + "2. [discrete unit of work]\n" + "REFLECT: yes — if the task is complex (multiple unknowns, external APIs, " + "research required, or high-stakes/irreversible actions); " + "no — if it is straightforward and the path is clear.\n\n" "Rules: maximum 6 subtasks. Each must be concrete and actionable. " "No execution yet — analysis only." ), @@ -959,17 +1008,112 @@ log.debug("agent.planning_stopped", phase=1) return - # ── Phase 2: Execution plan ──────────────────────────────────────────── - yield PlanningStatus(phase=2, label="Building execution plan...", is_subagent=is_subagent) - phase2_system = Message( + # ── Phase 2: Multi-perspective review (conditional) ──────────────────── + # Runs only when profile.planning_reflect_enabled=True AND phase 1 signals + # that the task is complex enough to warrant independent critique. + advisor_feedback: str = "" + needs_reflect = bool(_re.search(r"REFLECT\s*:\s*yes", analysis, _re.IGNORECASE)) + + if profile.planning_reflect_enabled and needs_reflect and not is_subagent: + yield PlanningStatus(phase=2, label="Consulting advisors...", is_subagent=is_subagent) + + _ADVISORS = [ + ( + "Critic", + "You are the Critic advisor. Your role: identify what could go wrong with this plan. " + "Look for untested assumptions, overlooked risks, missing error handling, and steps that " + "might fail silently. Be specific — name the exact step and the exact risk. " + "Do NOT suggest changing the user's goal. Critique the plan, not the objective.", + ), + ( + "Pragmatist", + "You are the Pragmatist advisor. Your role: find a simpler, more direct path. " + "Identify steps that are unnecessary, can be merged, or parallelised. " + "Flag steps where the chosen executor (TOOL/AGENT/SELF) is suboptimal. " + "Do NOT suggest changing the user's goal. Improve efficiency, not direction.", + ), + ( + "Detailer", + "You are the Detailer advisor. Your role: find what is missing. " + "Identify prerequisites not listed, edge cases unaddressed, outputs not specified, " + "and validation steps absent. Be concrete — add what is needed, do not restate what is there. " + "Do NOT suggest changing the user's goal. Complete the plan, do not redirect it.", + ), + ] + + async def _run_advisor(name: str, role_prompt: str) -> tuple[str, str, int, int]: + adv_system = Message( + role="system", + content=( + _base_sys + + "\n\n---\n\n" + f"[PLANNING — PHASE 2: ADVISOR — {name.upper()}]\n\n" + + role_prompt + + "\n\n---\n\n" + "The task analysis below was produced in phase 1. " + "Review it and provide your concise critique (3–7 bullet points max). " + "Speak directly to the plan — no preamble, no conclusion.\n\n" + f"PHASE 1 ANALYSIS:\n{analysis}" + ), + ) + # Full chat context so advisors have complete picture of the conversation + adv_ctx: list[Message] = [adv_system] + if mem: + adv_ctx.append(mem) + adv_ctx.extend(m for m in context if m.role != "system") + try: + r = await asyncio.wait_for( + llm.complete(adv_ctx, tools=None, temperature=0.4, model=profile.model, think=False), + timeout=settings.llm_complete_timeout, + ) + return name, (r.content or "").strip(), r.prompt_tokens or 0, r.completion_tokens or 0 + except Exception: + log.warning("agent.planning_advisor_failed", advisor=name, exc_info=True) + return name, "", 0, 0 + + try: + advisor_results = await asyncio.gather(*[ + _run_advisor(name, prompt) for name, prompt in _ADVISORS + ]) + except Exception: + log.warning("agent.planning_reflect_failed", exc_info=True) + advisor_results = [] + + _dbg["phases"]["2"] = {} + feedback_parts: list[str] = [] + for name, output, pt, ct in advisor_results: + if output: + feedback_parts.append(f"### {name}\n{output}") + yield AIHelperTokensUsed(prompt_tokens=pt, completion_tokens=ct) + _dbg["phases"]["2"][name] = {"output": output, "prompt_tokens": pt, "completion_tokens": ct} + + if feedback_parts: + advisor_feedback = "\n\n".join(feedback_parts) + log.debug("agent.planning_reflect_done", advisors=len(feedback_parts)) + + if _stop and _stop.is_set(): + log.debug("agent.planning_stopped", phase=2) + return + + # ── Phase 3: Execution plan ──────────────────────────────────────────── + yield PlanningStatus(phase=3, label="Building execution plan...", is_subagent=is_subagent) + + advisor_block = ( + "Advisor feedback from multi-perspective review — address these points in your plan:\n\n" + + advisor_feedback + + "\n\n---\n\n" + ) if advisor_feedback else "" + + phase3_system = Message( role="system", content=( _base_sys + "\n\n---\n\n" - "[PLANNING — PHASE 2: EXECUTION PLAN]\n\n" + "[PLANNING — PHASE 3: EXECUTION PLAN]\n\n" "Task analysis:\n\n" f"{analysis}\n\n" "---\n\n" + + advisor_block + available_tools_block + "Now write the execution plan. For each subtask assign a specific executor:\n" "- TOOL: — a single tool call is enough; use exact tool names from the list above\n" @@ -995,26 +1139,26 @@ "Do not write prose. Do not start executing. Plan only." ), ) - # Phase 2 only needs the analysis (embedded above) and the original request. + # Phase 3 only needs the analysis (embedded above) and the original request. # Full history is intentionally excluded to keep the focus on plan structure. - phase2_ctx: list[Message] = [phase2_system] + phase3_ctx: list[Message] = [phase3_system] if mem: - phase2_ctx.append(mem) + phase3_ctx.append(mem) user_msgs = [m for m in context if m.role == "user"] if user_msgs: - phase2_ctx.append(user_msgs[-1]) + phase3_ctx.append(user_msgs[-1]) try: r2 = await asyncio.wait_for( - llm.complete(phase2_ctx, tools=None, temperature=0.3, model=profile.model, think=False), + llm.complete(phase3_ctx, tools=None, temperature=0.3, model=profile.model, think=False), timeout=settings.llm_complete_timeout, ) plan_text = (r2.content or "").strip() except asyncio.TimeoutError: - log.warning("agent.planning_phase2_timeout", timeout=settings.llm_complete_timeout) + log.warning("agent.planning_phase3_timeout", timeout=settings.llm_complete_timeout) return except Exception: - log.warning("agent.planning_phase2_failed", exc_info=True) + log.warning("agent.planning_phase3_failed", exc_info=True) return if r2.prompt_tokens or r2.completion_tokens: @@ -1023,7 +1167,7 @@ completion_tokens=r2.completion_tokens or 0, ) - _dbg["phases"]["2"] = { + _dbg["phases"]["3"] = { "output": plan_text, "prompt_tokens": r2.prompt_tokens or 0, "completion_tokens": r2.completion_tokens or 0, @@ -1038,11 +1182,9 @@ return if _stop and _stop.is_set(): - log.debug("agent.planning_stopped", phase=2) + log.debug("agent.planning_stopped", phase=3) return - # ── Phase 3: disabled — to be reworked together with reflect ────────── - # Warn if executor assignments are still missing (plan may be malformed) if not _re.search(r"(TOOL:|AGENT:|→\s*SELF)", plan_text): log.warning("agent.planning_no_executors", hint="plan lacks TOOL/AGENT/SELF assignments") @@ -1065,7 +1207,7 @@ except Exception: log.warning("agent.todo_auto_populate_failed", exc_info=True) - log.debug("agent.plan_ready", phases=2, length=len(plan_text)) + log.debug("agent.plan_ready", phases=3 if advisor_feedback else 2, length=len(plan_text)) if not is_subagent: yield PlanningDebugData(log=_dbg) yield PlanReady(plan=plan_text, is_subagent=is_subagent) diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json index 8b9cf25..0bee425 100644 --- a/navi/profiles/developer/config.json +++ b/navi/profiles/developer/config.json @@ -22,7 +22,7 @@ "anti_stall_enabled": true, "anti_stall_threshold": 8, "step_validation_enabled": false, - "adaptive_replan_enabled": false, + "adaptive_replan_enabled": true, "subagent_tools": [ "todo", "scratchpad", "reflect", "web_search", "web_view", "http_request",