navi-1/navi/tools/todo.py at dba307ecbaab078017e3071ce93a27ef28fd10ad

Fork: 0
root / navi-1
Find file
Newer
Older
navi-1 / navi / tools / todo.py
Eugene Sukhodolskiy on 25 Apr 6 KB Strengthen todo progress discipline
Raw Blame History
"""Session-scoped task plan manager."""
from __future__ import annotations

from dataclasses import dataclass, field

from .base import Tool, ToolResult, current_session_id

_STATUS_ICON: dict[str, str] = {
    "pending":     "○",
    "in_progress": "◎",
    "done":        "✓",
    "failed":      "✗",
    "skipped":     "—",
}

@dataclass
class _Task:
    text: str
    status: str = "pending"
    validation: str = ""  # how the result was verified (required for done, encouraged for failed)


# In-memory store: session_id → task list (ephemeral, lives with the server process)
_plans: dict[str, list[_Task]] = {}


class TodoTool(Tool):
    name = "todo"
    description = (
        "Task plan tracker. Your todo list is automatically populated from the plan at the start of each task — "
        "you do NOT need to call 'set'. "
        "Indexes are 1-based. Call 'update' with status='in_progress' when you start a step. "
        "Call 'update' immediately after completing or failing each step — before moving to the next. "
        "When marking a step 'done', you MUST provide a 'validation' field describing how you verified the result. "
        "When marking a step 'failed', provide 'validation' explaining what went wrong and what you tried. "
        "Before final response, make sure every completed step, including the final step, is marked done with validation. "
        "Call 'view' to re-orient yourself after sub-agent execution or long tool chains. "
        "Use 'set' only when you need to replace the plan mid-task (rare). "
        "Statuses: pending → in_progress → done / failed / skipped."
    )
    parameters = {
        "type": "object",
        "properties": {
            "op": {
                "type": "string",
                "enum": ["set", "view", "update", "clear"],
                "description": (
                    "set — create/replace the Master Plan with a list of task milestones; "
                    "view — show the current state of the plan; "
                    "update — change the status of a specific task; "
                    "clear — reset the plan"
                ),
            },
            "tasks": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Ordered list of task descriptions (required for 'set').",
            },
            "index": {
                "type": "integer",
                "description": "1-based task index (required for 'update').",
            },
            "status": {
                "type": "string",
                "enum": ["pending", "in_progress", "done", "failed", "skipped"],
                "description": "New status for the task (required for 'update').",
            },
            "validation": {
                "type": "string",
                "description": (
                    "Required when status='done': briefly describe how you verified the result "
                    "(e.g. 'ran test_tool — output matched expected', 'read the file and confirmed content'). "
                    "Encouraged when status='failed': describe what went wrong and what you tried."
                ),
            },
        },
        "required": ["op"],
    }

    async def execute(self, params: dict) -> ToolResult:
        sid = current_session_id.get() or "__default__"
        op  = params.get("op")

        if op == "set":
            raw = params.get("tasks") or []
            if not raw:
                return ToolResult(success=False, output="", error="'tasks' list is required for 'set'")
            _plans[sid] = [_Task(text=str(t)) for t in raw]
            return ToolResult(success=True, output=self._render(sid))

        if op == "view":
            if sid not in _plans or not _plans[sid]:
                return ToolResult(success=True, output="No plan set for this session.")
            return ToolResult(success=True, output=self._render(sid))

        if op == "update":
            tasks = _plans.get(sid)
            if not tasks:
                return ToolResult(success=False, output="", error="No plan set. Use 'set' first.")
            idx        = params.get("index")
            status     = params.get("status")
            validation = (params.get("validation") or "").strip()
            if not idx or not status:
                return ToolResult(success=False, output="", error="'index' and 'status' are required for 'update'")
            if idx < 1 or idx > len(tasks):
                return ToolResult(success=False, output="", error=f"index {idx} is out of range (plan has {len(tasks)} tasks)")

            if status == "done" and not validation:
                return ToolResult(
                    success=False,
                    output=(
                        f"Cannot mark step {idx} as done without validation.\n"
                        "Provide a 'validation' field describing how you verified the result before marking it done.\n"
                        "Example: \"ran test_tool — all assertions passed\" or \"read the output file and confirmed expected content\".\n"
                        "If you haven't verified yet — verify first, then update."
                    ),
                    error="validation_required",
                )

            if status == "failed" and not validation:
                # Soft prompt — don't block, but encourage explanation
                tasks[idx - 1].status = status
                return ToolResult(
                    success=True,
                    output=(
                        self._render(sid) + "\n\n"
                        f"[Tip: next time add a 'validation' field when marking a step failed — "
                        "describe what went wrong and what you tried. "
                        "This helps with re-planning.]"
                    ),
                )

            tasks[idx - 1].status = status
            tasks[idx - 1].validation = validation
            return ToolResult(success=True, output=self._render(sid))

        if op == "clear":
            _plans.pop(sid, None)
            return ToolResult(success=True, output="Plan cleared.")

        return ToolResult(success=False, output="", error=f"Unknown op: {op!r}")

    def _render(self, sid: str) -> str:
        tasks = _plans.get(sid, [])
        if not tasks:
            return "Plan is empty."
        n = len(tasks)
        done = sum(1 for t in tasks if t.status == "done")
        lines = [f"Plan — {done}/{n} done:"]
        for i, t in enumerate(tasks, 1):
            icon   = _STATUS_ICON.get(t.status, "?")
            suffix = f" ({t.status})" if t.status not in ("pending", "done") else ""
            validation_note = f" [verified: {t.validation}]" if t.validation else ""
            lines.append(f"  {icon} {i}. {t.text}{suffix}{validation_note}")
        return "\n".join(lines)