diff --git a/.env.example b/.env.example index b24c4a1..0c8c870 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,13 @@ +# Single-server mode (used when OLLAMA_BACKENDS_FILE is not set) OLLAMA_HOST=http://localhost:11434 OLLAMA_API_KEY= OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud +# Multi-server fallback mode: path to [{host, api_key?}, ...] JSON file. +# When set, overrides OLLAMA_HOST / OLLAMA_API_KEY. +# Model priority is defined per-profile in config.json ("model": ["...", "..."]). +# OLLAMA_BACKENDS_FILE=ollama_backends.json + OPENAI_API_KEY= ANTHROPIC_API_KEY= diff --git a/.gitignore b/.gitignore index 3637fe6..aa1ea3b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ ssh_hosts.json +ollama_backends.json .tmp task_manager.json tools/*_data.json diff --git a/navi/config.py b/navi/config.py index 601b43b..42dfa27 100644 --- a/navi/config.py +++ b/navi/config.py @@ -32,6 +32,10 @@ # SSH tool: path to JSON file with named connections ssh_hosts_file: str = "ssh_hosts.json" + # Ollama multi-backend fallback: path to JSON file with server list [{host, api_key?}, ...] + # When set, overrides ollama_host / ollama_api_key and enables server+model fallback. + ollama_backends_file: str = "" + # Database # Set DATABASE_URL to use PostgreSQL: postgresql://user:pass@host:port/db # Leave empty to fall back to SQLite (db_path). diff --git a/navi/core/registry.py b/navi/core/registry.py index 56d377f..9f9e647 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -4,6 +4,7 @@ from navi.exceptions import ProfileNotFound, ToolNotFound from navi.llm.base import LLMBackend from navi.llm.ollama import OllamaBackend +from navi.llm.fallback import FallbackOllamaBackend, load_servers_from_file from navi.profiles import ALL_PROFILES from navi.profiles.base import AgentProfile from navi.tools import ( @@ -108,12 +109,17 @@ """Build and populate registries with all built-in components.""" from navi.core.ai_helper import AIHelper - # Backends are needed by AIHelper — build early - ollama_backend = OllamaBackend( - model=settings.ollama_default_model, - host=settings.ollama_host, - api_key=settings.ollama_api_key, - ) + # Backends are needed by AIHelper — build early. + # Use FallbackOllamaBackend when a backends file is configured. + if settings.ollama_backends_file: + servers = load_servers_from_file(settings.ollama_backends_file) + ollama_backend: LLMBackend = FallbackOllamaBackend(servers) + else: + ollama_backend = OllamaBackend( + model=settings.ollama_default_model, + host=settings.ollama_host, + api_key=settings.ollama_api_key, + ) ai_helper = AIHelper( backend=ollama_backend, default_model=settings.ollama_default_model, diff --git a/navi/exceptions.py b/navi/exceptions.py index 025b495..735d2ac 100644 --- a/navi/exceptions.py +++ b/navi/exceptions.py @@ -30,6 +30,14 @@ pass +class LLMConnectionError(LLMBackendError): + """Server unreachable — connection refused, timeout, network error.""" + + +class LLMModelNotFoundError(LLMBackendError): + """Model not found on this server.""" + + class MaxIterationsReached(NaviError): def __init__(self, limit: int): super().__init__(f"Agent reached max iterations limit ({limit})") diff --git a/navi/llm/fallback.py b/navi/llm/fallback.py new file mode 100644 index 0000000..32e6ab0 --- /dev/null +++ b/navi/llm/fallback.py @@ -0,0 +1,183 @@ +"""FallbackOllamaBackend — tries multiple Ollama servers with per-server model fallback. + +Servers and models are independent priority lists. + +Algorithm (server-first): + For each server in priority order (skip blacklisted servers): + For each model in priority order (skip blacklisted model+server pairs): + Try the request. + LLMConnectionError → blacklist the server, break to next server. + LLMModelNotFoundError → blacklist (server, model), continue to next model. + Success → use this result. + If all combinations exhausted → raise LLMBackendError. + +Blacklists live in module-level sets (reset on server restart). +""" + +import json +import structlog +from dataclasses import dataclass +from pathlib import Path +from typing import AsyncGenerator + +from navi.exceptions import LLMBackendError, LLMConnectionError, LLMModelNotFoundError + +from .base import LLMBackend, LLMChunk, LLMResponse, Message, ToolSchema +from .ollama import OllamaBackend + +log = structlog.get_logger() + + +@dataclass +class ServerEntry: + host: str + api_key: str = "" + + +# Module-level blacklists — persist for the lifetime of the server process. +_dead_servers: set[str] = set() +_dead_models: set[tuple[str, str]] = set() # (host, model_name) + + +def load_servers_from_file(path: str) -> list[ServerEntry]: + """Load server list from a JSON file: [{host, api_key?}, ...]""" + data = json.loads(Path(path).read_text(encoding="utf-8")) + return [ServerEntry(host=e["host"], api_key=e.get("api_key", "")) for e in data] + + +class FallbackOllamaBackend(LLMBackend): + """Ollama backend with automatic server and model fallback.""" + + def __init__(self, servers: list[ServerEntry]) -> None: + self._servers = servers + # Cache OllamaBackend instances by host to reuse AsyncClient + self._clients: dict[str, OllamaBackend] = {} + + def _get_client(self, server: ServerEntry) -> OllamaBackend: + if server.host not in self._clients: + self._clients[server.host] = OllamaBackend( + model="", host=server.host, api_key=server.api_key + ) + return self._clients[server.host] + + @staticmethod + def _model_list(model: "list[str] | str | None") -> list[str]: + if isinstance(model, list): + return model if model else [""] + return [model] if model else [""] + + async def complete( + self, + messages: list[Message], + tools: list[ToolSchema] | None = None, + temperature: float = 0.7, + model: "list[str] | str | None" = None, + think: bool | None = None, + max_tokens: int | None = None, + ) -> LLMResponse: + models = self._model_list(model) + last_err: Exception = LLMBackendError("No backends configured") + + for server in self._servers: + if server.host in _dead_servers: + continue + for m in models: + if (server.host, m) in _dead_models: + continue + try: + return await self._get_client(server).complete( + messages, tools=tools, temperature=temperature, + model=m, think=think, max_tokens=max_tokens, + ) + except LLMConnectionError as e: + log.warning("fallback.server_dead", host=server.host, error=str(e)) + _dead_servers.add(server.host) + last_err = e + break # Skip remaining models — server is gone + except LLMModelNotFoundError as e: + log.warning("fallback.model_dead", host=server.host, model=m, error=str(e)) + _dead_models.add((server.host, m)) + last_err = e + # Continue to next model on the same server + + raise LLMBackendError(f"All backends exhausted: {last_err}") from last_err + + async def stream( + self, + messages: list[Message], + temperature: float = 0.7, + model: "list[str] | str | None" = None, + ) -> AsyncGenerator[LLMChunk, None]: + models = self._model_list(model) + last_err: Exception = LLMBackendError("No backends configured") + + for server in self._servers: + if server.host in _dead_servers: + continue + for m in models: + if (server.host, m) in _dead_models: + continue + try: + gen = self._get_client(server).stream(messages, temperature=temperature, model=m) + first = await gen.__anext__() + except StopAsyncIteration: + return + except LLMConnectionError as e: + log.warning("fallback.server_dead", host=server.host, error=str(e)) + _dead_servers.add(server.host) + last_err = e + break + except LLMModelNotFoundError as e: + log.warning("fallback.model_dead", host=server.host, model=m, error=str(e)) + _dead_models.add((server.host, m)) + last_err = e + continue + else: + yield first + async for chunk in gen: + yield chunk + return + + raise LLMBackendError(f"All backends exhausted: {last_err}") from last_err + + async def stream_complete( + self, + messages: list[Message], + tools: list[ToolSchema] | None = None, + temperature: float = 0.7, + model: "list[str] | str | None" = None, + think: bool | None = None, + ) -> AsyncGenerator[LLMChunk, None]: + models = self._model_list(model) + last_err: Exception = LLMBackendError("No backends configured") + + for server in self._servers: + if server.host in _dead_servers: + continue + for m in models: + if (server.host, m) in _dead_models: + continue + try: + gen = self._get_client(server).stream_complete( + messages, tools=tools, temperature=temperature, model=m, think=think, + ) + first = await gen.__anext__() + except StopAsyncIteration: + return + except LLMConnectionError as e: + log.warning("fallback.server_dead", host=server.host, error=str(e)) + _dead_servers.add(server.host) + last_err = e + break + except LLMModelNotFoundError as e: + log.warning("fallback.model_dead", host=server.host, model=m, error=str(e)) + _dead_models.add((server.host, m)) + last_err = e + continue + else: + yield first + async for chunk in gen: + yield chunk + return + + raise LLMBackendError(f"All backends exhausted: {last_err}") from last_err diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py index 6f6bfef..a816d83 100644 --- a/navi/llm/ollama.py +++ b/navi/llm/ollama.py @@ -6,7 +6,7 @@ import ollama as ollama_client from navi.config import settings -from navi.exceptions import LLMBackendError +from navi.exceptions import LLMBackendError, LLMConnectionError, LLMModelNotFoundError from .base import LLMBackend, LLMChunk, LLMResponse, Message, ToolCallRequest, ToolSchema @@ -45,9 +45,34 @@ return opts +def _resolve_model(model: "list[str] | str | None", default: str) -> str: + """Normalize model param: list → first element, None → default.""" + if isinstance(model, list): + return model[0] if model else default + return model or default + + +def _classify_error(e: Exception) -> Exception: + """Wrap raw Ollama/network exceptions into typed LLM exceptions.""" + if isinstance(e, ollama_client.RequestError): + return LLMConnectionError(str(e)) + if isinstance(e, ollama_client.ResponseError): + msg = e.error.lower() + if "not found" in msg or "does not exist" in msg or e.status_code == 404: + return LLMModelNotFoundError(str(e)) + return LLMBackendError(str(e)) + # Catch httpx / socket connection failures by message + err_str = str(e).lower() + if any(kw in err_str for kw in ("connect", "connection refused", "name or service not known", + "network", "timeout", "unreachable", "nodename")): + return LLMConnectionError(str(e)) + return LLMBackendError(str(e)) + + class OllamaBackend(LLMBackend): def __init__(self, model: str, host: str = "http://localhost:11434", api_key: str = ""): self.model = model + self._host = host headers = {"Authorization": f"Bearer {api_key}"} if api_key else None self._client = ollama_client.AsyncClient(host=host, headers=headers) @@ -56,13 +81,14 @@ messages: list[Message], tools: list[ToolSchema] | None = None, temperature: float = 0.7, - model: str | None = None, + model: "list[str] | str | None" = None, think: bool | None = None, max_tokens: int | None = None, ) -> LLMResponse: + resolved = _resolve_model(model, self.model) try: kwargs: dict = { - "model": model or self.model, + "model": resolved, "messages": _to_ollama_messages(messages), "options": _base_options(temperature, think=think, max_tokens=max_tokens), "stream": False, @@ -93,18 +119,21 @@ prompt_tokens=getattr(response, "prompt_eval_count", None) or None, completion_tokens=getattr(response, "eval_count", None) or None, ) + except (LLMConnectionError, LLMModelNotFoundError, LLMBackendError): + raise except Exception as e: - raise LLMBackendError(str(e)) from e + raise _classify_error(e) from e async def stream( self, messages: list[Message], temperature: float = 0.7, - model: str | None = None, + model: "list[str] | str | None" = None, ) -> AsyncGenerator[LLMChunk, None]: + resolved = _resolve_model(model, self.model) try: async for chunk in await self._client.chat( - model=model or self.model, + model=resolved, messages=_to_ollama_messages(messages), options=_base_options(temperature), stream=True, @@ -119,20 +148,23 @@ prompt_tokens=chunk.prompt_eval_count if chunk.done else None, completion_tokens=chunk.eval_count if chunk.done else None, ) + except (LLMConnectionError, LLMModelNotFoundError, LLMBackendError): + raise except Exception as e: - raise LLMBackendError(str(e)) from e + raise _classify_error(e) from e async def stream_complete( self, messages: list[Message], tools: list[ToolSchema] | None = None, temperature: float = 0.7, - model: str | None = None, + model: "list[str] | str | None" = None, think: bool | None = None, ) -> AsyncGenerator[LLMChunk, None]: + resolved = _resolve_model(model, self.model) try: kwargs: dict = { - "model": model or self.model, + "model": resolved, "messages": _to_ollama_messages(messages), "options": _base_options(temperature, think=think), "stream": True, @@ -167,5 +199,7 @@ prompt_tokens=chunk.prompt_eval_count if chunk.done else None, completion_tokens=chunk.eval_count if chunk.done else None, ) + except (LLMConnectionError, LLMModelNotFoundError, LLMBackendError): + raise except Exception as e: - raise LLMBackendError(str(e)) from e + raise _classify_error(e) from e diff --git a/navi/profiles/base.py b/navi/profiles/base.py index 7a99966..e140656 100644 --- a/navi/profiles/base.py +++ b/navi/profiles/base.py @@ -15,7 +15,13 @@ system_prompt: str enabled_tools: list[str] # tool names; resolved by ToolRegistry at runtime llm_backend: str = "ollama" # backend key, e.g. "ollama", "openai" - model: str = "gemma4:31b-cloud" + # Ordered list of preferred models; first available wins at runtime. + # Accepts a plain string for backward compatibility (auto-wrapped in a list). + model: list[str] = field(default_factory=lambda: ["gemma4:31b-cloud"]) + + def __post_init__(self) -> None: + if isinstance(self.model, str): + self.model = [self.model] max_iterations: int = 10 temperature: float = 0.7 planning_enabled: bool = False # if True, run a planning LLM call before the main loop diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json index 39d7496..69036e4 100644 --- a/navi/profiles/developer/config.json +++ b/navi/profiles/developer/config.json @@ -9,7 +9,10 @@ "key_tools": "filesystem, code_exec, terminal, web_search, web_view, spawn_agent" }, "llm_backend": "ollama", - "model": "gemma4:31b-cloud", + "model": [ + "gemma4:31b-cloud", + "gemma4:26b-a4b-it-q4_K_M" + ], "temperature": 0.2, "max_iterations": 35, "planning_enabled": true, @@ -24,17 +27,36 @@ "step_validation_enabled": false, "adaptive_replan_enabled": true, "subagent_tools": [ - "todo", "scratchpad", "reflect", - "web_search", "web_view", "http_request", - "filesystem", "code_exec", "terminal", "image_view", - "list_tools", "share_file" + "todo", + "scratchpad", + "reflect", + "web_search", + "web_view", + "http_request", + "filesystem", + "code_exec", + "terminal", + "image_view", + "list_tools", + "share_file" ], "enabled_tools": [ - "todo", "scratchpad", "reflect", "switch_profile", "list_profiles", - "web_search", "web_view", "http_request", - "filesystem", "code_exec", "terminal", "image_view", - "memory", "list_tools", - "spawn_agent", "share_file", + "todo", + "scratchpad", + "reflect", + "switch_profile", + "list_profiles", + "web_search", + "web_view", + "http_request", + "filesystem", + "code_exec", + "terminal", + "image_view", + "memory", + "list_tools", + "spawn_agent", + "share_file", "email_manager" ] -} +} \ No newline at end of file diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py index 4402efa..3b21874 100644 --- a/navi/profiles/loader.py +++ b/navi/profiles/loader.py @@ -19,6 +19,13 @@ _REQUIRED_CONFIG_KEYS = {"id", "name", "description", "enabled_tools"} +def _normalize_model(value: object) -> list[str]: + """Accept str or list[str], always return list[str].""" + if isinstance(value, list): + return [str(m) for m in value] or ["gemma4:31b-cloud"] + return [str(value)] if value else ["gemma4:31b-cloud"] + + def load_profiles_from_dir(profiles_dir: str | Path) -> list[AgentProfile]: """Load all valid profile directories under profiles_dir.""" base = Path(profiles_dir) @@ -59,7 +66,7 @@ system_prompt=system_prompt, enabled_tools=config["enabled_tools"], llm_backend=config.get("llm_backend", "ollama"), - model=config.get("model", "gemma4:31b-cloud"), + model=_normalize_model(config.get("model", ["gemma4:31b-cloud"])), temperature=config.get("temperature", 0.7), max_iterations=config.get("max_iterations", 20), planning_enabled=config.get("planning_enabled", False), diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json index a144b16..8f52c5a 100644 --- a/navi/profiles/secretary/config.json +++ b/navi/profiles/secretary/config.json @@ -9,7 +9,10 @@ "key_tools": "web_search, web_view, filesystem, code_exec, gmail, todo, scratchpad, spawn_agent, memory" }, "llm_backend": "ollama", - "model": "gemma4:31b-cloud", + "model": [ + "gemma4:31b-cloud", + "gemma4:26b-a4b-it-q4_K_M" + ], "temperature": 0.5, "max_iterations": 25, "planning_enabled": true, @@ -24,22 +27,36 @@ "step_validation_enabled": false, "adaptive_replan_enabled": false, "subagent_tools": [ - "scratchpad", "reflect", - "web_search", "web_view", "http_request", - "filesystem", "code_exec", "image_view", + "scratchpad", + "reflect", + "web_search", + "web_view", + "http_request", + "filesystem", + "code_exec", + "image_view", "memory", "share_file", "weather" ], "enabled_tools": [ - "todo", "scratchpad", "reflect", "switch_profile", "list_profiles", - "web_search", "web_view", "http_request", - "filesystem", "code_exec", "image_view", + "todo", + "scratchpad", + "reflect", + "switch_profile", + "list_profiles", + "web_search", + "web_view", + "http_request", + "filesystem", + "code_exec", + "image_view", "memory", - "list_tools", "tool_manual", + "list_tools", + "tool_manual", "spawn_agent", "share_file", "weather", "email_manager" ] -} +} \ No newline at end of file diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json index 64039b4..0f7e679 100644 --- a/navi/profiles/server_admin/config.json +++ b/navi/profiles/server_admin/config.json @@ -9,7 +9,10 @@ "key_tools": "ssh_exec, terminal, filesystem, code_exec, web_search, spawn_agent, memory" }, "llm_backend": "ollama", - "model": "gemma4:31b-cloud", + "model": [ + "gemma4:31b-cloud", + "gemma4:26b-a4b-it-q4_K_M" + ], "temperature": 0.2, "max_iterations": 20, "planning_enabled": true, @@ -24,19 +27,36 @@ "step_validation_enabled": false, "adaptive_replan_enabled": false, "subagent_tools": [ - "scratchpad", "reflect", - "web_search", "http_request", - "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", + "scratchpad", + "reflect", + "web_search", + "http_request", + "filesystem", + "code_exec", + "terminal", + "ssh_exec", + "image_view", "share_file" ], "enabled_tools": [ - "todo", "scratchpad", "reflect", "switch_profile", "list_profiles", - "web_search", "web_view", "http_request", - "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", + "todo", + "scratchpad", + "reflect", + "switch_profile", + "list_profiles", + "web_search", + "web_view", + "http_request", + "filesystem", + "code_exec", + "terminal", + "ssh_exec", + "image_view", "memory", - "list_tools", "tool_manual", + "list_tools", + "tool_manual", "spawn_agent", "share_file", "email_manager" ] -} +} \ No newline at end of file diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json index 8cb5a99..471fb84 100644 --- a/navi/profiles/tool_developer/config.json +++ b/navi/profiles/tool_developer/config.json @@ -9,7 +9,10 @@ "key_tools": "write_tool, reload_tools, delete_tool, test_tool, filesystem, terminal, code_exec, memory" }, "llm_backend": "ollama", - "model": "gemma4:31b-cloud", + "model": [ + "gemma4:31b-cloud", + "gemma4:26b-a4b-it-q4_K_M" + ], "temperature": 0.2, "max_iterations": 35, "planning_enabled": true, @@ -24,21 +27,45 @@ "step_validation_enabled": false, "adaptive_replan_enabled": true, "subagent_tools": [ - "todo", "scratchpad", "reflect", - "web_search", "web_view", "http_request", - "filesystem", "code_exec", "terminal", "image_view", - "write_tool", "reload_tools", "delete_tool", "list_tools", "tool_manual", "test_tool", + "todo", + "scratchpad", + "reflect", + "web_search", + "web_view", + "http_request", + "filesystem", + "code_exec", + "terminal", + "image_view", + "write_tool", + "reload_tools", + "delete_tool", + "list_tools", + "tool_manual", + "test_tool", "share_file" ], "enabled_tools": [ - "todo", "scratchpad", "reflect", "switch_profile", "list_profiles", - "web_search", "web_view", "http_request", - "filesystem", "code_exec", "terminal", "image_view", + "todo", + "scratchpad", + "reflect", + "switch_profile", + "list_profiles", + "web_search", + "web_view", + "http_request", + "filesystem", + "code_exec", + "terminal", + "image_view", "memory", - "reload_tools", "delete_tool", "list_tools", "tool_manual", + "reload_tools", + "delete_tool", + "list_tools", + "tool_manual", "test_tool", "spawn_agent", "share_file", "email_manager" ] -} +} \ No newline at end of file diff --git a/navi/tools/base.py b/navi/tools/base.py index bda8930..f86bdd8 100644 --- a/navi/tools/base.py +++ b/navi/tools/base.py @@ -29,7 +29,7 @@ # Set by run_stream() / run_ephemeral() to expose the current profile's model name # to tools that need to make their own LLM calls (e.g. AIHelper-powered tools). -current_model: ContextVar[str | None] = ContextVar("current_model", default=None) +current_model: ContextVar[list[str] | str | None] = ContextVar("current_model", default=None) @dataclass