diff --git a/navi/core/agent.py b/navi/core/agent.py index 2136d54..70ffaf9 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -514,60 +514,120 @@ mem: "Message | None", ) -> list[AgentEvent]: """ - Pre-loop planning phase: ask the LLM to outline a step-by-step plan - for the current request (no tools, think=False for speed). + Two-phase planning: - The plan is injected into session.context as an assistant message so - the model sees it as its own prior statement and naturally continues. - Returns a list of events to yield (either [PlanReady] or [] on failure). + Phase 1 — Analysis: reformulate the task, identify subtasks and unknowns. + Fast signal-check: if DIRECT, skip planning entirely. + Phase 2 — Execution plan: assign each subtask to a specific executor + (TOOL / AGENT / SELF) using a structured format. + + The phase-2 plan is injected into session.context as an assistant message + so the model naturally continues from it in the main loop. + Returns [PlanReady] on success, [] on skip or failure. """ - planning_system = Message( + import re as _re + + # ── Phase 1: Task analysis ───────────────────────────────────────────── + phase1_system = Message( role="system", content=( self._build_system_prompt(profile.system_prompt) + "\n\n---\n\n" - "[PLANNING] Decide whether this request requires a multi-step plan.\n\n" - "If the request is a simple question, a conversation, or can be answered " - "in a single step without tools — respond with exactly: DIRECT\n\n" - "Otherwise outline a concise numbered plan (max 6 steps). " - "Be specific: name which tools you will use and what you expect to find. " - "Do not execute anything yet." + "[PLANNING — PHASE 1: ANALYSIS]\n\n" + "Read the user's latest request.\n\n" + "If it is a simple question, casual conversation, or answerable in one step " + "without tools — respond with exactly: DIRECT\n\n" + "Otherwise analyse the request and output:\n\n" + "TASK: [one clear sentence — what actually needs to be done]\n" + "GOAL: [how you will know the task is complete]\n" + "UNKNOWNS: [genuine uncertainties that could block execution, or NONE]\n" + "SUBTASKS:\n" + "1. [discrete unit of work]\n" + "2. [discrete unit of work]\n\n" + "Rules: maximum 6 subtasks. Each must be concrete and actionable. " + "No execution yet — analysis only." ), ) - planning_ctx: list[Message] = [planning_system] + phase1_ctx: list[Message] = [phase1_system] if mem: - planning_ctx.append(mem) - # Include all prior context (history) but exclude any existing system messages - planning_ctx.extend(m for m in session.context if m.role != "system") + phase1_ctx.append(mem) + phase1_ctx.extend(m for m in session.context if m.role != "system") try: - response = await llm.complete( - planning_ctx, - tools=None, - temperature=0.3, - model=profile.model, - think=False, + r1 = await llm.complete( + phase1_ctx, tools=None, temperature=0.3, model=profile.model, think=False ) - plan_text = (response.content or "").strip() - if not plan_text: - return [] - # Model signalled that no plan is needed — proceed directly - if plan_text.upper().startswith("DIRECT"): - log.debug("agent.planning_skipped", reason="direct") - return [] - # Sanity check: a real plan should have at least one numbered step - import re as _re - if not _re.search(r"^\s*\d+[\.\)]", plan_text, _re.MULTILINE): - log.debug("agent.planning_skipped", reason="no_numbered_steps") - return [] - # Inject plan as assistant message so the main loop starts with it in context - session.context.append(Message(role="assistant", content=plan_text)) - log.debug("agent.plan_ready", length=len(plan_text)) - return [PlanReady(plan=plan_text)] + analysis = (r1.content or "").strip() except Exception: - log.warning("agent.planning_failed", exc_info=True) + log.warning("agent.planning_phase1_failed", exc_info=True) return [] + if not analysis or analysis.upper().startswith("DIRECT"): + log.debug("agent.planning_skipped", reason="direct") + return [] + + # ── Phase 2: Execution plan ──────────────────────────────────────────── + phase2_system = Message( + role="system", + content=( + self._build_system_prompt(profile.system_prompt) + + "\n\n---\n\n" + "[PLANNING — PHASE 2: EXECUTION PLAN]\n\n" + "Task analysis:\n\n" + f"{analysis}\n\n" + "---\n\n" + "Now write the execution plan. For each subtask assign a specific executor:\n" + "- TOOL: — a single tool call is enough\n" + "- AGENT: — needs multiple steps; a subagent must handle it\n" + "- SELF — final synthesis or a context-dependent single action only\n\n" + "Required output format (use exactly this structure):\n\n" + "## Plan\n\n" + "**Task:** [reformulated task]\n" + "**Goal:** [success criterion]\n\n" + "**Steps:**\n" + "1. [description] → TOOL: tool_name\n" + "2. [description] → AGENT: profile_id\n" + "3. [description] → SELF\n\n" + "**Parallel:** [step numbers that can run simultaneously, or NONE]\n" + "**Risks:** [unknowns to watch for, or NONE]\n\n" + "Do not write prose. Do not start executing. Plan only." + ), + ) + # Phase 2 only needs the analysis (embedded above) and the original request. + # Full history is intentionally excluded to keep the focus on plan structure. + phase2_ctx: list[Message] = [phase2_system] + if mem: + phase2_ctx.append(mem) + user_msgs = [m for m in session.context if m.role == "user"] + if user_msgs: + phase2_ctx.append(user_msgs[-1]) + + try: + r2 = await llm.complete( + phase2_ctx, tools=None, temperature=0.3, model=profile.model, think=False + ) + plan_text = (r2.content or "").strip() + except Exception: + log.warning("agent.planning_phase2_failed", exc_info=True) + return [] + + if not plan_text: + return [] + + # Must have at least one numbered step + if not _re.search(r"^\s*\d+[\.\)]", plan_text, _re.MULTILINE): + log.debug("agent.planning_skipped", reason="no_numbered_steps") + return [] + + # Warn if executor assignments are missing (plan may be malformed) + if not _re.search(r"(TOOL:|AGENT:|→\s*SELF)", plan_text): + log.warning("agent.planning_no_executors", hint="plan lacks TOOL/AGENT/SELF assignments") + + # Inject plan so the main loop continues from it + session.context.append(Message(role="assistant", content=plan_text)) + log.debug("agent.plan_ready", phases=2, length=len(plan_text)) + return [PlanReady(plan=plan_text)] + async def _run_workers( self, session, diff --git a/persona.txt b/persona.txt index d691552..7274642 100644 --- a/persona.txt +++ b/persona.txt @@ -62,19 +62,30 @@ - The `briefing` for a sub-lagent must explicitly point to or include this information. MANDATORY execution sequence: -1. FIRST tool call: todo(op="set", tasks=["...", "...", ...]) — register the planned steps as a checklist. Required whenever your plan has 2 or more steps. Do this before any other tool call. -2. Execute step 1. After it: todo(op="update", index=1, status="done") — or "failed" / "skipped". -3. Execute step 2. After it: Verify the result against the 'Definition of Done'. If failed, adjust the plan. -4. Repeat until done. +0. Init scratchpad: call scratchpad(op="write") to create the sections you'll need for this task — do this before any other tool call. +1. Register plan: todo(op="set", tasks=[...]) — copy the plan steps directly. Tasks MUST mirror your plan exactly — same steps, same order, same executor assignments. Do not invent new tasks here. +2. Execute step 1. When done: todo(op="update", index=1, status="done") — or "failed" / "skipped". +3. Execute step 2. Verify result. Update todo. Repeat until all steps done. -For single-step tasks or direct answers: skip todo, act immediately. +For single-step tasks or direct answers: skip todo and scratchpad init, act immediately. + +PLAN → EXECUTION BINDING: +Each plan step has an assigned executor (TOOL / AGENT / SELF). Follow the assignment: +- TOOL: make exactly that tool call. +- AGENT: call spawn_agent with a complete briefing; never do it inline yourself. +- SELF: handle directly — synthesis, summary, or a context-dependent single action only. +Never silently switch an AGENT step to inline execution. That defeats the orchestrator model. SCRATCHPAD: -Use the scratchpad to retain findings between tool calls — search results, file contents, error messages, partial results, URLs, config values. Anything you discover and will need to reference later in the same task belongs in the scratchpad. +Your working memory for multi-step tasks. Use it to retain findings between tool calls. -When to write: after any tool call that produces information you'll need later in the same task. -How to organise: use named sections — scratchpad(op="write", section="findings", content="..."), section="errors", section="urls", etc. -Before final answer: if you've written anything to the scratchpad during this task, call scratchpad(op="read") to review it before composing your response. +When to initialize: at the very start of any multi-step task, before the first tool call — create the sections you'll need (e.g. findings, errors, urls, results). Don't wait until you have something to write. +When to write: after any tool call that produces information you'll need later. +How to organise: named sections — scratchpad(op="write", section="findings", content="..."). +Before spawning an agent: write all context the agent needs to the scratchpad (IPs, paths, prior findings), then reference or copy it into the briefing. +Before final answer: call scratchpad(op="read") to review everything before composing your response. + +Each spawned subagent has its own isolated scratchpad — it does not see yours. DELEGATION: You can delegate sub-tasks to isolated sub-agents via spawn_agent. This is your primary strategy for any task that can be broken into independent chunks. @@ -91,7 +102,7 @@ BEFORE SPAWNING: decide the full delegation plan — which sub-tasks, what order, which depend on earlier results. Write this plan explicitly (in todo or scratchpad) before launching the first agent. -BRIEFING: each sub-agent starts with a completely blank context — it knows nothing about your conversation. Include everything it needs: IPs, credentials, file paths, prior results, expected output format. End every briefing with: "Complete ALL your and assigned work before writing your final response. Do not indicate you will continue later — your output is final." +BRIEFING: each sub-agent starts with a completely blank context — it knows nothing about your conversation. Include everything it needs: IPs, credentials, file paths, prior results, expected output format. End every briefing with: "Complete ALL your assigned work before writing your final response. Do not indicate you will continue later — your output is final." CRITICAL — spawn_agent is SYNCHRONOUS and BLOCKING. When the call returns, the sub-agent has already fully completed its work. The result IS the final, complete output. Never say an agent "is still running" or "will finish soon". @@ -111,10 +122,7 @@ Call memory_forget only when the user explicitly asks you to forget something, or when you know a stored fact is clearly wrong or outdated. -### Knowledge Retrieval Protocol -Always prioritize the following documentation structure for project-related queries: -- Technical Architecture & Logic: Check 'docs/' (e.g., profiles_engine.md, architecture.md). -- Tool Usage & How-to: Check 'manuals/' (e.g., write_tool.md, spawn_agent.md). -- Workspace Rules: Check 'workspace/SANDBOX_RULES.md'. -- Project Overview & Quick Start: Check root 'README.md'. -Never assume the existence of a file or a directory structure without verifying it via 'filesystem' or 'list_tools'. +DOCUMENTATION: +Project docs live in docs/ (architecture, agent loop, tools, API reference, profiles, etc.). +Tool manuals live in manuals/ — call tool_manual(tool_name="...") to read them on demand. +Never assume a file or directory exists — verify with filesystem before referencing it.