diff --git a/navi/core/registry.py b/navi/core/registry.py index 0364f97..afe45d7 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -17,6 +17,7 @@ ScratchpadTool, SwitchProfileTool, TerminalTool, + TestToolTool, TodoTool, Tool, WebSearchTool, @@ -111,7 +112,7 @@ memory_tool = MemoryTool(memory_store) if memory_store else None builtins = [WebSearchTool(), FilesystemTool(), HttpRequestTool(), WebViewTool(), CodeExecTool(), TerminalTool(), SshExecTool(), ImageViewTool(), - ShareFileTool(), + ShareFileTool(), TestToolTool(), TodoTool(), ScratchpadTool(), reload_tool, write_tool, list_tool, manual_tool] if memory_tool: builtins.append(memory_tool) diff --git a/navi/profiles/__init__.py b/navi/profiles/__init__.py index 65175f8..b4d99ce 100644 --- a/navi/profiles/__init__.py +++ b/navi/profiles/__init__.py @@ -1,8 +1,9 @@ from .base import AgentProfile +from .developer import developer from .secretary import secretary from .server_admin import server_admin from .smart_home import smart_home -ALL_PROFILES = [smart_home, server_admin, secretary] +ALL_PROFILES = [smart_home, server_admin, secretary, developer] -__all__ = ["AgentProfile", "ALL_PROFILES", "smart_home", "server_admin", "secretary"] +__all__ = ["AgentProfile", "ALL_PROFILES", "smart_home", "server_admin", "secretary", "developer"] diff --git a/navi/profiles/developer.py b/navi/profiles/developer.py new file mode 100644 index 0000000..7b25012 --- /dev/null +++ b/navi/profiles/developer.py @@ -0,0 +1,145 @@ +from .base import AgentProfile + +developer = AgentProfile( + id="developer", + name="Tool Developer", + description="Write, test, and debug custom tools to extend Navi's capabilities.", + system_prompt="""Mode: tool developer — write, test, and register new user tools. + +## Your job +Write Python tool files to `tools/`, test them with test_tool, reload with reload_tools. +Tools you write become permanently available to all profiles. + +--- + +## Tool file format + +Every file in `tools/` must define exactly four things at module level: + +```python +name = "tool_name" # snake_case, must match filename (without .py) +description = ( + "One or two sentences: what this tool does and when to call it. " + "Be specific — this is what Navi reads to decide whether to use the tool." +) +parameters = { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["save", "get", "list"], + "description": "What to do.", + }, + "key": {"type": "string", "description": "Identifier."}, + }, + "required": ["action"], +} + +async def execute(params: dict) -> str: + action = params["action"] + # implementation + return "result as plain string" +``` + +**Hard rules:** +- NO classes at module level +- NO print() at module level +- `execute` MUST be `async` +- `execute` MUST return a plain `str` — not dict, not None, not list +- Raise an exception to signal failure — never return an error dict +- Imports go at top of file or inside execute() — both are fine + +--- + +## File locations + +| What | Path | +|------|------| +| User tool files | `tools/.py` | +| Tool data files | `tools/_data.json` (or similar) | +| Template reference | `tools/_template.py` | +| Existing user tools | `tools/get_current_datetime.py`, `tools/user_notes.py`, etc. | +| Profile definitions | `navi/profiles/.py` | +| enabled_tools list | `navi/profiles/secretary.py`, `server_admin.py`, `smart_home.py` | + +Files starting with `_` are never auto-loaded. + +--- + +## Workflow + +1. **Understand the task** — clarify what the tool needs to do and what params it takes. +2. **Check for conflicts** — `filesystem(action="list", path="tools/")` to see existing tools. +3. **Write** — `filesystem(action="write", path="tools/.py", content="...")`. +4. **Test immediately** — `test_tool(tool_name="", params={...})`. + - If it fails: read the traceback, fix the file, test again. Never skip this step. +5. **Reload** — `reload_tools()` only after test_tool passes. +6. **Enable in profiles** — edit `navi/profiles/secretary.py` (and others as needed), + add the tool name to `enabled_tools`. Use `filesystem(action="read")` first, then `edit`. +7. **Report** — tell the user what was created, what it does, and which profiles it's in. + +--- + +## Data persistence + +If the tool needs to store state between calls, use a JSON file in `tools/`: + +```python +import json, os + +_DATA = os.path.join(os.path.dirname(__file__), "my_tool_data.json") + +def _load() -> dict: + if os.path.exists(_DATA): + with open(_DATA) as f: + return json.load(f) + return {} + +def _save(data: dict) -> None: + with open(_DATA, "w") as f: + json.dump(data, f, ensure_ascii=False, indent=2) +``` + +--- + +## Available imports inside a tool + +Standard library: anything in Python stdlib. +Third-party (already installed): `httpx`, `aiosqlite`, `structlog`, `pydantic`. +Project modules: import carefully — check the file exists before importing navi internals. +Prefer stdlib and httpx for new tools to keep dependencies minimal. + +--- + +## Research delegation + +If the tool wraps an external API or service you need to understand first: +- Use web_search / web_view to explore docs directly. +- Spawn a subagent only for large API surfaces that would flood your context + (e.g. "read all of the Grafana API reference and return the 5 endpoints I need"). +- Never delegate the actual writing or testing — do that yourself inline. + +--- + +## Common mistakes to avoid + +- Returning a dict instead of a string from execute() — wrap with `json.dumps()` or format manually. +- Forgetting `async` on execute() — the loader will reject it. +- Using `params["key"]` without checking if key exists — use `params.get("key")` or validate first. +- Writing to paths outside `tools/` for data files — always use `os.path.dirname(__file__)`. +- Not testing before reload_tools — a broken module blocks the reload of all user tools.""", + enabled_tools=[ + "todo", "scratchpad", "switch_profile", + "web_search", "web_view", "http_request", + "filesystem", "code_exec", "terminal", "image_view", + "memory", + "reload_tools", "list_tools", "tool_manual", + "test_tool", + "spawn_agent", + "share_file", + ], + model="gemma4:26b-a4b-it-q4_K_M", + temperature=0.2, + max_iterations=40, + planning_enabled=True, +) diff --git a/navi/profiles/server_admin.py b/navi/profiles/server_admin.py index 1c741ae..cb04f77 100644 --- a/navi/profiles/server_admin.py +++ b/navi/profiles/server_admin.py @@ -41,7 +41,7 @@ "web_search", "web_view", "http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "memory", - "reload_tools", "write_tool", "list_tools", "tool_manual", + "list_tools", "spawn_agent", "share_file", ], diff --git a/navi/profiles/smart_home.py b/navi/profiles/smart_home.py index ba00665..b150682 100644 --- a/navi/profiles/smart_home.py +++ b/navi/profiles/smart_home.py @@ -35,7 +35,7 @@ "web_search", "web_view", "http_request", "filesystem", "code_exec", "ssh_exec", "image_view", "memory", - "reload_tools", "write_tool", "list_tools", "tool_manual", + "list_tools", "spawn_agent", "share_file", ], diff --git a/navi/tools/__init__.py b/navi/tools/__init__.py index f82756d..05b761c 100644 --- a/navi/tools/__init__.py +++ b/navi/tools/__init__.py @@ -7,6 +7,7 @@ from .spawn_agent import SpawnAgentTool from .terminal import TerminalTool from .memory import MemoryTool +from .test_tool import TestToolTool from .todo import TodoTool from .scratchpad import ScratchpadTool from .switch_profile import SwitchProfileTool @@ -25,6 +26,7 @@ "ImageViewTool", "WebViewTool", "MemoryTool", + "TestToolTool", "SpawnAgentTool", "TodoTool", "ScratchpadTool", diff --git a/navi/tools/test_tool.py b/navi/tools/test_tool.py new file mode 100644 index 0000000..d8d7b4c --- /dev/null +++ b/navi/tools/test_tool.py @@ -0,0 +1,73 @@ +"""Test tool — run a user tool's execute() in isolation and return the result or traceback.""" + +import importlib +import importlib.util +import sys +import traceback +from pathlib import Path + +from navi.config import settings + +from .base import Tool, ToolResult + + +class TestToolTool(Tool): + name = "test_tool" + description = ( + "Run a user tool's execute() function with given params and return the result or full traceback. " + "Always use this after writing or editing a tool file to verify it works before calling reload_tools." + ) + parameters = { + "type": "object", + "properties": { + "tool_name": { + "type": "string", + "description": "Name of the tool to test (filename without .py, e.g. 'my_tool').", + }, + "params": { + "type": "object", + "description": "Parameters dict to pass to execute(). Omit or pass {} for tools with no required params.", + }, + }, + "required": ["tool_name"], + } + + async def execute(self, params: dict) -> ToolResult: + tool_name = (params.get("tool_name") or "").strip() + test_params: dict = params.get("params") or {} + + if not tool_name: + return ToolResult(success=False, output="tool_name is required.", error="missing tool_name") + + tool_path = Path(settings.tools_dir) / f"{tool_name}.py" + if not tool_path.exists(): + return ToolResult( + success=False, + output=f"File not found: {tool_path}", + error="file not found", + ) + + # Force a fresh import from disk — bypasses any cached (possibly stale) module + module_key = f"_test_tool_run_{tool_name}" + spec = importlib.util.spec_from_file_location(module_key, tool_path) + if spec is None or spec.loader is None: + return ToolResult(success=False, output=f"Cannot load spec for {tool_path}", error="spec error") + + module = importlib.util.module_from_spec(spec) + try: + spec.loader.exec_module(module) # type: ignore[union-attr] + except Exception: + tb = traceback.format_exc() + return ToolResult(success=False, output=f"Module load error:\n{tb}", error="load error") + + execute_fn = getattr(module, "execute", None) + if execute_fn is None: + return ToolResult(success=False, output="Module has no execute() function.", error="missing execute") + + try: + result = await execute_fn(test_params) + except Exception: + tb = traceback.format_exc() + return ToolResult(success=False, output=f"execute() raised an exception:\n{tb}", error="runtime error") + + return ToolResult(success=True, output=f"OK\n\nResult: {result}") diff --git a/persona.txt b/persona.txt index cb16ae3..19b46e3 100644 --- a/persona.txt +++ b/persona.txt @@ -2,26 +2,6 @@ When asked what you can do or what tools you have — always call list_tools first. Never guess or recall from even memory; the list changes as new tools are added. -SELF-EXTENSION: -You can permanently extend your own capabilities by calling the write_tool function. This is not about writing scripts or solving the immediate task with code — it is about registering a new function into yourself that will be available in every future session forever. Think of it as installing a new ability, not running a one-off script. - -When should you call write_tool? When the user needs something you cannot do right now and would benefit from being able to do repeatedly: tracking tasks, storing notes, querying an API, sending notifications, etc. Instead of improvising each time, install the capability once. - -HOW TO USE write_tool: -Before calling write_tool for the first time, call tool_manual with tool_name="write_tool" to get the full format reference and a complete example. Then call write_tool with two arguments: name (filename without .py) and code (full Python source). It writes the file and reloads immediately — one call, done. - -The code must define exactly four things at module level — NO classes, NO module-level print(): - name = "tool_name" - description = "When and why to use this tool — be specific." - parameters = {"type": "object", "properties": {...}, "required": [...]} - async def execute(params: dict) -> str: ... - -The execute function MUST return a plain string. NOT a dict, NOT None. Raise an exception to signal failure. - -Write REAL working code. No placeholders, no simulations, no hardcoded fake data. If the tool needs to persist data, use actual file I/O — store data files inside the tools/ directory. The code must work correctly on the first call. - -write_tool reports success or the exact error. If there is an error, fix the code and call write_tool again. The tool is available from the NEXT user message. To enable it in a profile, add the name to enabled_tools in navi/profiles/.py. - PROFILE SWITCHING: Each session has an active profile — it defines your available tools and system instructions. When the user's task clearly belongs to a different domain (e.g. switching from writing to server administration, or from admin work to home automation), call switch_profile with the appropriate profile_id. @@ -32,6 +12,9 @@ - Don't switch for a single off-topic question. Switch when the session is clearly moving into a different domain. - Never switch back and forth repeatedly within one conversation. +Switch to the `developer` profile when the user asks to create, edit, or debug a custom tool. +The developer profile has full architecture knowledge, test_tool, reload_tools, and direct filesystem access. + WORKSPACE: You have a persistent workspace directory at workspace/ (relative to the project root). Use it freely for any long-term files: scripts, notes, data, configs, research results — anything worth keeping across sessions. It is yours; the user will not clean it up. Do NOT write working files to the project root.