diff --git a/navi/config.py b/navi/config.py index d1cea32..d874ed1 100644 --- a/navi/config.py +++ b/navi/config.py @@ -15,6 +15,12 @@ openai_api_key: str = "" anthropic_api_key: str = "" + # Web search fallbacks (used when DuckDuckGo returns no results) + # Brave Search API: free tier = 2000 req/month — https://brave.com/search/api/ + brave_search_api_key: str = "" + # SearXNG: self-hosted meta-search, e.g. "http://localhost:8888" + searxng_url: str = "" + # Filesystem tool: comma-separated allowed root paths fs_allowed_paths: str = "*" diff --git a/navi/core/agent.py b/navi/core/agent.py index 8754305..ff1395c 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -443,7 +443,19 @@ context_tokens: int | None = None built_ctx = self._build_context(session.context, profile, mem) - self._check_context_size(built_ctx) + try: + self._check_context_size(built_ctx) + except ContextTooLargeError as e: + # Surface the error as a Navi response (not a raw system error) so the + # user sees a coherent message and the exchange is saved to history. + error_text = str(e) + assistant_msg = Message(role="assistant", content=error_text) + session.context.append(assistant_msg) + session.messages.append(assistant_msg) + await self._sessions.save(session) + yield TextDelta(delta=error_text) + yield StreamEnd(content=error_text) + return async for chunk in _iter_stream_guarded( llm.stream_complete( @@ -814,16 +826,36 @@ Uses a conservative character-based estimate (~4 chars per token for text). Images are counted at 500 tokens each (rough vision-model estimate). - Threshold: 92% of ollama_num_ctx — leaves headroom for the response. + + Checks against the *remaining* budget, not a fixed percentage of the window: + available_for_input = ollama_num_ctx - output_reserve + where output_reserve is a fixed token headroom reserved for the model's response. + This correctly accounts for sessions where conversation history already consumes + a large portion of the window. """ - estimated = sum(len(m.content or "") for m in context) // 4 - estimated += sum(500 for m in context if m.images) - limit = int(settings.ollama_num_ctx * 0.92) - if estimated > limit: + if not context: + return + + output_reserve = 2048 # tokens reserved for the model's own response + + def _estimate(msgs: list[Message]) -> int: + chars = sum(len(m.content or "") for m in msgs) + imgs = sum(500 for m in msgs if m.images) + return chars // 4 + imgs + + total = _estimate(context) + available = settings.ollama_num_ctx - output_reserve + + if total > available: + existing = _estimate(context[:-1]) + new = _estimate(context[-1:]) + remaining = available - existing raise ContextTooLargeError( - f"Context too large: ~{estimated:,} estimated tokens, " - f"safe limit {limit:,} (num_ctx={settings.ollama_num_ctx}). " - "Try splitting files into smaller parts or use a subagent for heavy analysis." + f"Context too large: new content is ~{new:,} estimated tokens, " + f"but only ~{max(0, remaining):,} tokens are available " + f"(window {settings.ollama_num_ctx:,}, already used ~{existing:,}, " + f"output_reserve {output_reserve:,}). " + "Split the file into smaller parts or delegate to a subagent." ) async def _run_single_tool( diff --git a/navi/tools/filesystem.py b/navi/tools/filesystem.py index fa12fe3..2c516ba 100644 --- a/navi/tools/filesystem.py +++ b/navi/tools/filesystem.py @@ -1,4 +1,4 @@ -"""Filesystem tool — read/write/list/delete files. +"""Filesystem tool — read/write/append/list/find/info/move/delete/exists. If FS_ALLOWED_PATHS=* (default), any path is accessible. Otherwise set a comma-separated list of allowed root paths, e.g.: @@ -6,19 +6,22 @@ """ import shutil +import stat +from datetime import datetime from pathlib import Path from navi.config import settings from .base import Tool, ToolResult +_READ_WARN_BYTES = 100_000 # 100 KB — add size warning in output +_READ_HARD_BYTES = 1_000_000 # 1 MB — refuse full read without offset/limit +_LIST_MAX_ENTRIES = 500 +_FIND_MAX_RESULTS = 200 + def _check_path(path_str: str) -> Path | None: - """Return resolved Path if access is allowed, else None. - - Called per-request so config changes take effect on restart without - needing module-level state. - """ + """Return resolved Path if access is allowed, else None.""" try: p = Path(path_str).expanduser().resolve() except Exception: @@ -37,20 +40,42 @@ return None +def _fmt_size(n: int) -> str: + if n < 1024: + return f"{n} B" + if n < 1024 ** 2: + return f"{n / 1024:.1f} KB" + if n < 1024 ** 3: + return f"{n / 1024 ** 2:.1f} MB" + return f"{n / 1024 ** 3:.1f} GB" + + +def _fmt_time(ts: float) -> str: + return datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M") + + class FilesystemTool(Tool): name = "filesystem" description = ( "Operate on the local filesystem. " - "Actions: read — get file text; write — create/overwrite file (creates parent dirs); " - "list — directory contents; delete — remove file or directory tree; " - "exists — check if path exists. Paths support ~ expansion." + "Actions: " + "read — get file text; use offset+limit for large files to avoid flooding context; " + "write — create/overwrite file (creates parent dirs); " + "append — add text to end of file (creates if missing); " + "list — directory contents with sizes and dates, optional recursive; " + "find — search files by glob pattern, e.g. '*.py' or '**/*.conf'; " + "info — file metadata: size, line count, modified date, permissions; " + "move — rename or move a file/directory; " + "delete — remove file or directory tree; " + "exists — check if path exists. " + "Tip: call info before reading an unknown file to check its size first." ) parameters = { "type": "object", "properties": { "action": { "type": "string", - "enum": ["read", "write", "list", "delete", "exists"], + "enum": ["read", "write", "append", "list", "find", "info", "move", "delete", "exists"], "description": "Operation to perform", }, "path": { @@ -59,7 +84,27 @@ }, "content": { "type": "string", - "description": "Content to write (required for 'write' action)", + "description": "Text to write or append (required for write/append)", + }, + "destination": { + "type": "string", + "description": "Target path for move action", + }, + "pattern": { + "type": "string", + "description": "Glob pattern for find action, e.g. '*.log' or '**/*.py'", + }, + "offset": { + "type": "integer", + "description": "First line to read, 1-based (for read action)", + }, + "limit": { + "type": "integer", + "description": "Maximum number of lines to return (for read action)", + }, + "recursive": { + "type": "boolean", + "description": "List the full directory tree recursively (for list action, default false)", }, }, "required": ["action", "path"], @@ -71,12 +116,12 @@ path = _check_path(raw_path) if path is None: - allowed = settings.fs_allowed_paths return ToolResult( success=False, output=( - f"Access denied: '{raw_path}' is outside allowed paths ({allowed}). " - f"Set FS_ALLOWED_PATHS=* in .env to allow all paths." + f"Access denied: '{raw_path}' is outside allowed paths " + f"({settings.fs_allowed_paths}). " + "Set FS_ALLOWED_PATHS=* in .env to allow all paths." ), error="access_denied", ) @@ -84,38 +129,23 @@ try: match action: case "read": - if not path.exists(): - return ToolResult(success=False, output=f"File not found: {path}", error="not_found") - content = path.read_text(encoding="utf-8", errors="replace") - return ToolResult(success=True, output=content) - + return self._read(path, params) case "write": - content = params.get("content", "") - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content, encoding="utf-8") - return ToolResult(success=True, output=f"Written {len(content)} bytes to {path}") - + return self._write(path, params) + case "append": + return self._append(path, params) case "list": - if not path.exists(): - return ToolResult(success=False, output=f"Path not found: {path}", error="not_found") - if path.is_file(): - return ToolResult(success=True, output=str(path)) - entries = sorted(path.iterdir(), key=lambda e: (e.is_file(), e.name)) - lines = [f"{' ' if e.is_file() else 'd '}{e.name}" for e in entries] - return ToolResult(success=True, output="\n".join(lines) or "(empty directory)") - + return self._list(path, params) + case "find": + return self._find(path, params) + case "info": + return self._info(path) + case "move": + return self._move(path, params) case "delete": - if not path.exists(): - return ToolResult(success=False, output=f"Not found: {path}", error="not_found") - if path.is_dir(): - shutil.rmtree(path) - else: - path.unlink() - return ToolResult(success=True, output=f"Deleted: {path}") - + return self._delete(path) case "exists": return ToolResult(success=True, output="true" if path.exists() else "false") - case _: return ToolResult(success=False, output=f"Unknown action: {action}", error="invalid_action") @@ -123,3 +153,205 @@ return ToolResult(success=False, output=f"Permission denied: {e}", error=str(e)) except Exception as e: return ToolResult(success=False, output=f"Filesystem error: {e}", error=str(e)) + + # ── action handlers ─────────────────────────────────────────────────────── + + def _read(self, path: Path, params: dict) -> ToolResult: + if not path.exists(): + return ToolResult(success=False, output=f"File not found: {path}", error="not_found") + if path.is_dir(): + return ToolResult(success=False, output=f"Path is a directory, use 'list': {path}", error="is_directory") + + file_size = path.stat().st_size + offset = params.get("offset") # 1-based + limit = params.get("limit") + + # Refuse to dump files over 1 MB without an explicit range + if file_size > _READ_HARD_BYTES and offset is None and limit is None: + return ToolResult( + success=False, + output=( + f"File too large to read in full: {_fmt_size(file_size)} — {path}\n" + "Use offset/limit to read specific line ranges " + "(e.g. offset=1, limit=100), or call info first to see the total line count." + ), + error="file_too_large", + ) + + text = path.read_text(encoding="utf-8", errors="replace") + lines = text.splitlines(keepends=True) + total_lines = len(lines) + + if offset is not None or limit is not None: + start = max(0, (offset or 1) - 1) # convert 1-based → 0-based + end = (start + limit) if limit is not None else total_lines + selected = lines[start:end] + actual_end = min(end, total_lines) + header = ( + f"[{path} | lines {start + 1}–{actual_end} of {total_lines}" + f" | {_fmt_size(file_size)}]\n" + ) + return ToolResult(success=True, output=header + "".join(selected)) + + warn = ( + f"⚠ Large file ({_fmt_size(file_size)}) — " + "consider offset/limit for targeted reads next time.\n" + if file_size > _READ_WARN_BYTES + else "" + ) + header = f"[{path} | {total_lines} lines | {_fmt_size(file_size)}]\n" + return ToolResult(success=True, output=header + warn + text) + + def _write(self, path: Path, params: dict) -> ToolResult: + content = params.get("content", "") + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + lines = len(content.splitlines()) + return ToolResult( + success=True, + output=f"Written {_fmt_size(len(content.encode()))} ({lines} lines) → {path}", + ) + + def _append(self, path: Path, params: dict) -> ToolResult: + content = params.get("content", "") + if not content: + return ToolResult(success=False, output="'content' is required for append", error="missing_content") + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as f: + f.write(content) + total_size = path.stat().st_size + return ToolResult( + success=True, + output=( + f"Appended {_fmt_size(len(content.encode()))} to {path} " + f"(file now {_fmt_size(total_size)})" + ), + ) + + def _list(self, path: Path, params: dict) -> ToolResult: + if not path.exists(): + return ToolResult(success=False, output=f"Path not found: {path}", error="not_found") + if path.is_file(): + return self._info(path) + + recursive = params.get("recursive", False) + raw_entries = list(path.rglob("*") if recursive else path.iterdir()) + raw_entries.sort(key=lambda e: (e.is_file(), str(e).lower())) + + truncated = len(raw_entries) > _LIST_MAX_ENTRIES + entries = raw_entries[:_LIST_MAX_ENTRIES] + + lines = [] + for e in entries: + try: + s = e.stat() + rel = e.relative_to(path) + if e.is_dir(): + # child count only in non-recursive mode (cheap) + if not recursive: + try: + n = sum(1 for _ in e.iterdir()) + lines.append(f"d {rel}/ ({n} items)") + except PermissionError: + lines.append(f"d {rel}/") + else: + lines.append(f"d {rel}/") + else: + lines.append( + f" {str(rel):<48} {_fmt_size(s.st_size):>10} {_fmt_time(s.st_mtime)}" + ) + except Exception: + lines.append(f"? {e.name}") + + note = " ⚠ truncated" if truncated else "" + header = f"[{path} | {len(entries)} entries{note}]\n" + return ToolResult(success=True, output=header + ("\n".join(lines) or "(empty directory)")) + + def _find(self, path: Path, params: dict) -> ToolResult: + pattern = params.get("pattern") + if not pattern: + return ToolResult(success=False, output="'pattern' is required for find action", error="missing_pattern") + if not path.exists(): + return ToolResult(success=False, output=f"Path not found: {path}", error="not_found") + + matches: list[Path] = [] + try: + for p in path.rglob(pattern): + matches.append(p) + if len(matches) >= _FIND_MAX_RESULTS: + break + except Exception as e: + return ToolResult(success=False, output=f"Find error: {e}", error=str(e)) + + if not matches: + return ToolResult(success=True, output=f"No matches for '{pattern}' in {path}") + + matches.sort() + lines = [] + for m in matches: + try: + size = _fmt_size(m.stat().st_size) if m.is_file() else "