diff --git a/navi/core/agent.py b/navi/core/agent.py index 194b789..4363c7e 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -199,6 +199,31 @@ return frozenset() +def _todo_progress_message(session_id: str, *, first_iteration: bool = False) -> "Message | None": + """Build a compact system reminder with current todo state and update discipline.""" + try: + from navi.tools.todo import _plans, _STATUS_ICON + tasks = _plans.get(session_id, []) + if not tasks: + return None + + lines = ["[Todo discipline]"] + if first_iteration: + lines.append("The todo list was auto-populated from the plan.") + lines.append("Use 1-based todo indexes. When starting a step, mark it in_progress.") + lines.append("After completing or failing a step, update todo before moving on.") + lines.append("Do not mark a step done until you have verified it; include validation explaining the check.") + lines.append("Before final response, close every completed step with todo update and validation.") + lines.append("Current todo:") + for i, t in enumerate(tasks, 1): + icon = _STATUS_ICON.get(t.status, "?") + validation_note = f"; verified: {t.validation}" if t.validation else "" + lines.append(f" {icon} {i}. {t.text} ({t.status}{validation_note})") + return Message(role="system", content="\n".join(lines)) + except Exception: + return None + + class Agent: def __init__( self, @@ -650,19 +675,9 @@ ): built_ctx.append(self._build_goal_anchor(session_id, user_message)) - if iteration == 0: - try: - from navi.tools.todo import _plans, _STATUS_ICON - tasks = _plans.get(session_id, []) - if tasks: - lines = ["[Todo — track your progress]"] - for i, t in enumerate(tasks): - icon = _STATUS_ICON.get(t.status, "?") - lines.append(f" {icon} [{i}] {t.text} ({t.status})") - lines.append("Mark each step in_progress when you start it, done when complete.") - built_ctx.append(Message(role="system", content="\n".join(lines))) - except Exception: - pass + todo_msg = _todo_progress_message(session_id, first_iteration=(iteration == 0)) + if todo_msg: + built_ctx.append(todo_msg) # Snapshot todo state before this iteration (for stall detection after) _todo_snapshot_before = _todo_status_snapshot(session_id) @@ -887,9 +902,9 @@ ) _replan_msg = ( f"[Adaptive re-plan] {failed_labels} just failed. " - "Before continuing, revise your plan: call todo(op=\"set\") or todo(op=\"update\") " - "to replace or skip the remaining pending steps with a revised approach " - "that accounts for what went wrong. Then continue execution." + "Before continuing, revise your plan with the todo tool: either replace the remaining " + "pending steps or mark failed/skipped steps with validation. Then continue execution " + "with an approach that accounts for what went wrong." ) log.info("agent.adaptive_replan_queued", failures=len(new_failures), session_id=session_id) @@ -1439,6 +1454,8 @@ pass lines.append("Stay on track — complete the remaining pending/in_progress steps.") + lines.append("Use 1-based todo indexes. Mark completed steps done only after verification, with validation.") + lines.append("Before final response, update todo for every completed step, including the final one.") return Message(role="system", content="\n".join(lines)) def _tool_list(self, enabled: list[str]) -> list[Tool]: diff --git a/navi/tools/todo.py b/navi/tools/todo.py index 42d4fdf..eeb1675 100644 --- a/navi/tools/todo.py +++ b/navi/tools/todo.py @@ -29,9 +29,11 @@ description = ( "Task plan tracker. Your todo list is automatically populated from the plan at the start of each task — " "you do NOT need to call 'set'. " - "Call 'update' (index + status) immediately after completing or failing each step — before moving to the next. " + "Indexes are 1-based. Call 'update' with status='in_progress' when you start a step. " + "Call 'update' immediately after completing or failing each step — before moving to the next. " "When marking a step 'done', you MUST provide a 'validation' field describing how you verified the result. " "When marking a step 'failed', provide 'validation' explaining what went wrong and what you tried. " + "Before final response, make sure every completed step, including the final step, is marked done with validation. " "Call 'view' to re-orient yourself after sub-agent execution or long tool chains. " "Use 'set' only when you need to replace the plan mid-task (rare). " "Statuses: pending → in_progress → done / failed / skipped."