diff --git a/.env.example b/.env.example
index b24c4a1..0c8c870 100644
--- a/.env.example
+++ b/.env.example
@@ -1,7 +1,13 @@
+# Single-server mode (used when OLLAMA_BACKENDS_FILE is not set)
 OLLAMA_HOST=http://localhost:11434
 OLLAMA_API_KEY=
 OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud
 
+# Multi-server fallback mode: path to [{host, api_key?}, ...] JSON file.
+# When set, overrides OLLAMA_HOST / OLLAMA_API_KEY.
+# Model priority is defined per-profile in config.json ("model": ["...", "..."]).
+# OLLAMA_BACKENDS_FILE=ollama_backends.json
+
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
 
diff --git a/.gitignore b/.gitignore
index 3637fe6..aa1ea3b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 ssh_hosts.json
+ollama_backends.json
 .tmp
 task_manager.json
 tools/*_data.json
diff --git a/navi/config.py b/navi/config.py
index 601b43b..42dfa27 100644
--- a/navi/config.py
+++ b/navi/config.py
@@ -32,6 +32,10 @@
     # SSH tool: path to JSON file with named connections
     ssh_hosts_file: str = "ssh_hosts.json"
 
+    # Ollama multi-backend fallback: path to JSON file with server list [{host, api_key?}, ...]
+    # When set, overrides ollama_host / ollama_api_key and enables server+model fallback.
+    ollama_backends_file: str = ""
+
     # Database
     # Set DATABASE_URL to use PostgreSQL: postgresql://user:pass@host:port/db
     # Leave empty to fall back to SQLite (db_path).
diff --git a/navi/core/registry.py b/navi/core/registry.py
index 56d377f..9f9e647 100644
--- a/navi/core/registry.py
+++ b/navi/core/registry.py
@@ -4,6 +4,7 @@
 from navi.exceptions import ProfileNotFound, ToolNotFound
 from navi.llm.base import LLMBackend
 from navi.llm.ollama import OllamaBackend
+from navi.llm.fallback import FallbackOllamaBackend, load_servers_from_file
 from navi.profiles import ALL_PROFILES
 from navi.profiles.base import AgentProfile
 from navi.tools import (
@@ -108,12 +109,17 @@
     """Build and populate registries with all built-in components."""
     from navi.core.ai_helper import AIHelper
 
-    # Backends are needed by AIHelper — build early
-    ollama_backend = OllamaBackend(
-        model=settings.ollama_default_model,
-        host=settings.ollama_host,
-        api_key=settings.ollama_api_key,
-    )
+    # Backends are needed by AIHelper — build early.
+    # Use FallbackOllamaBackend when a backends file is configured.
+    if settings.ollama_backends_file:
+        servers = load_servers_from_file(settings.ollama_backends_file)
+        ollama_backend: LLMBackend = FallbackOllamaBackend(servers)
+    else:
+        ollama_backend = OllamaBackend(
+            model=settings.ollama_default_model,
+            host=settings.ollama_host,
+            api_key=settings.ollama_api_key,
+        )
     ai_helper = AIHelper(
         backend=ollama_backend,
         default_model=settings.ollama_default_model,
diff --git a/navi/exceptions.py b/navi/exceptions.py
index 025b495..735d2ac 100644
--- a/navi/exceptions.py
+++ b/navi/exceptions.py
@@ -30,6 +30,14 @@
     pass
 
 
+class LLMConnectionError(LLMBackendError):
+    """Server unreachable — connection refused, timeout, network error."""
+
+
+class LLMModelNotFoundError(LLMBackendError):
+    """Model not found on this server."""
+
+
 class MaxIterationsReached(NaviError):
     def __init__(self, limit: int):
         super().__init__(f"Agent reached max iterations limit ({limit})")
diff --git a/navi/llm/fallback.py b/navi/llm/fallback.py
new file mode 100644
index 0000000..32e6ab0
--- /dev/null
+++ b/navi/llm/fallback.py
@@ -0,0 +1,183 @@
+"""FallbackOllamaBackend — tries multiple Ollama servers with per-server model fallback.
+
+Servers and models are independent priority lists.
+
+Algorithm (server-first):
+  For each server in priority order (skip blacklisted servers):
+    For each model in priority order (skip blacklisted model+server pairs):
+      Try the request.
+      LLMConnectionError  → blacklist the server, break to next server.
+      LLMModelNotFoundError → blacklist (server, model), continue to next model.
+      Success → use this result.
+  If all combinations exhausted → raise LLMBackendError.
+
+Blacklists live in module-level sets (reset on server restart).
+"""
+
+import json
+import structlog
+from dataclasses import dataclass
+from pathlib import Path
+from typing import AsyncGenerator
+
+from navi.exceptions import LLMBackendError, LLMConnectionError, LLMModelNotFoundError
+
+from .base import LLMBackend, LLMChunk, LLMResponse, Message, ToolSchema
+from .ollama import OllamaBackend
+
+log = structlog.get_logger()
+
+
+@dataclass
+class ServerEntry:
+    host: str
+    api_key: str = ""
+
+
+# Module-level blacklists — persist for the lifetime of the server process.
+_dead_servers: set[str] = set()
+_dead_models: set[tuple[str, str]] = set()  # (host, model_name)
+
+
+def load_servers_from_file(path: str) -> list[ServerEntry]:
+    """Load server list from a JSON file: [{host, api_key?}, ...]"""
+    data = json.loads(Path(path).read_text(encoding="utf-8"))
+    return [ServerEntry(host=e["host"], api_key=e.get("api_key", "")) for e in data]
+
+
+class FallbackOllamaBackend(LLMBackend):
+    """Ollama backend with automatic server and model fallback."""
+
+    def __init__(self, servers: list[ServerEntry]) -> None:
+        self._servers = servers
+        # Cache OllamaBackend instances by host to reuse AsyncClient
+        self._clients: dict[str, OllamaBackend] = {}
+
+    def _get_client(self, server: ServerEntry) -> OllamaBackend:
+        if server.host not in self._clients:
+            self._clients[server.host] = OllamaBackend(
+                model="", host=server.host, api_key=server.api_key
+            )
+        return self._clients[server.host]
+
+    @staticmethod
+    def _model_list(model: "list[str] | str | None") -> list[str]:
+        if isinstance(model, list):
+            return model if model else [""]
+        return [model] if model else [""]
+
+    async def complete(
+        self,
+        messages: list[Message],
+        tools: list[ToolSchema] | None = None,
+        temperature: float = 0.7,
+        model: "list[str] | str | None" = None,
+        think: bool | None = None,
+        max_tokens: int | None = None,
+    ) -> LLMResponse:
+        models = self._model_list(model)
+        last_err: Exception = LLMBackendError("No backends configured")
+
+        for server in self._servers:
+            if server.host in _dead_servers:
+                continue
+            for m in models:
+                if (server.host, m) in _dead_models:
+                    continue
+                try:
+                    return await self._get_client(server).complete(
+                        messages, tools=tools, temperature=temperature,
+                        model=m, think=think, max_tokens=max_tokens,
+                    )
+                except LLMConnectionError as e:
+                    log.warning("fallback.server_dead", host=server.host, error=str(e))
+                    _dead_servers.add(server.host)
+                    last_err = e
+                    break  # Skip remaining models — server is gone
+                except LLMModelNotFoundError as e:
+                    log.warning("fallback.model_dead", host=server.host, model=m, error=str(e))
+                    _dead_models.add((server.host, m))
+                    last_err = e
+                    # Continue to next model on the same server
+
+        raise LLMBackendError(f"All backends exhausted: {last_err}") from last_err
+
+    async def stream(
+        self,
+        messages: list[Message],
+        temperature: float = 0.7,
+        model: "list[str] | str | None" = None,
+    ) -> AsyncGenerator[LLMChunk, None]:
+        models = self._model_list(model)
+        last_err: Exception = LLMBackendError("No backends configured")
+
+        for server in self._servers:
+            if server.host in _dead_servers:
+                continue
+            for m in models:
+                if (server.host, m) in _dead_models:
+                    continue
+                try:
+                    gen = self._get_client(server).stream(messages, temperature=temperature, model=m)
+                    first = await gen.__anext__()
+                except StopAsyncIteration:
+                    return
+                except LLMConnectionError as e:
+                    log.warning("fallback.server_dead", host=server.host, error=str(e))
+                    _dead_servers.add(server.host)
+                    last_err = e
+                    break
+                except LLMModelNotFoundError as e:
+                    log.warning("fallback.model_dead", host=server.host, model=m, error=str(e))
+                    _dead_models.add((server.host, m))
+                    last_err = e
+                    continue
+                else:
+                    yield first
+                    async for chunk in gen:
+                        yield chunk
+                    return
+
+        raise LLMBackendError(f"All backends exhausted: {last_err}") from last_err
+
+    async def stream_complete(
+        self,
+        messages: list[Message],
+        tools: list[ToolSchema] | None = None,
+        temperature: float = 0.7,
+        model: "list[str] | str | None" = None,
+        think: bool | None = None,
+    ) -> AsyncGenerator[LLMChunk, None]:
+        models = self._model_list(model)
+        last_err: Exception = LLMBackendError("No backends configured")
+
+        for server in self._servers:
+            if server.host in _dead_servers:
+                continue
+            for m in models:
+                if (server.host, m) in _dead_models:
+                    continue
+                try:
+                    gen = self._get_client(server).stream_complete(
+                        messages, tools=tools, temperature=temperature, model=m, think=think,
+                    )
+                    first = await gen.__anext__()
+                except StopAsyncIteration:
+                    return
+                except LLMConnectionError as e:
+                    log.warning("fallback.server_dead", host=server.host, error=str(e))
+                    _dead_servers.add(server.host)
+                    last_err = e
+                    break
+                except LLMModelNotFoundError as e:
+                    log.warning("fallback.model_dead", host=server.host, model=m, error=str(e))
+                    _dead_models.add((server.host, m))
+                    last_err = e
+                    continue
+                else:
+                    yield first
+                    async for chunk in gen:
+                        yield chunk
+                    return
+
+        raise LLMBackendError(f"All backends exhausted: {last_err}") from last_err
diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py
index 6f6bfef..a816d83 100644
--- a/navi/llm/ollama.py
+++ b/navi/llm/ollama.py
@@ -6,7 +6,7 @@
 import ollama as ollama_client
 
 from navi.config import settings
-from navi.exceptions import LLMBackendError
+from navi.exceptions import LLMBackendError, LLMConnectionError, LLMModelNotFoundError
 
 from .base import LLMBackend, LLMChunk, LLMResponse, Message, ToolCallRequest, ToolSchema
 
@@ -45,9 +45,34 @@
     return opts
 
 
+def _resolve_model(model: "list[str] | str | None", default: str) -> str:
+    """Normalize model param: list → first element, None → default."""
+    if isinstance(model, list):
+        return model[0] if model else default
+    return model or default
+
+
+def _classify_error(e: Exception) -> Exception:
+    """Wrap raw Ollama/network exceptions into typed LLM exceptions."""
+    if isinstance(e, ollama_client.RequestError):
+        return LLMConnectionError(str(e))
+    if isinstance(e, ollama_client.ResponseError):
+        msg = e.error.lower()
+        if "not found" in msg or "does not exist" in msg or e.status_code == 404:
+            return LLMModelNotFoundError(str(e))
+        return LLMBackendError(str(e))
+    # Catch httpx / socket connection failures by message
+    err_str = str(e).lower()
+    if any(kw in err_str for kw in ("connect", "connection refused", "name or service not known",
+                                     "network", "timeout", "unreachable", "nodename")):
+        return LLMConnectionError(str(e))
+    return LLMBackendError(str(e))
+
+
 class OllamaBackend(LLMBackend):
     def __init__(self, model: str, host: str = "http://localhost:11434", api_key: str = ""):
         self.model = model
+        self._host = host
         headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
         self._client = ollama_client.AsyncClient(host=host, headers=headers)
 
@@ -56,13 +81,14 @@
         messages: list[Message],
         tools: list[ToolSchema] | None = None,
         temperature: float = 0.7,
-        model: str | None = None,
+        model: "list[str] | str | None" = None,
         think: bool | None = None,
         max_tokens: int | None = None,
     ) -> LLMResponse:
+        resolved = _resolve_model(model, self.model)
         try:
             kwargs: dict = {
-                "model": model or self.model,
+                "model": resolved,
                 "messages": _to_ollama_messages(messages),
                 "options": _base_options(temperature, think=think, max_tokens=max_tokens),
                 "stream": False,
@@ -93,18 +119,21 @@
                 prompt_tokens=getattr(response, "prompt_eval_count", None) or None,
                 completion_tokens=getattr(response, "eval_count", None) or None,
             )
+        except (LLMConnectionError, LLMModelNotFoundError, LLMBackendError):
+            raise
         except Exception as e:
-            raise LLMBackendError(str(e)) from e
+            raise _classify_error(e) from e
 
     async def stream(
         self,
         messages: list[Message],
         temperature: float = 0.7,
-        model: str | None = None,
+        model: "list[str] | str | None" = None,
     ) -> AsyncGenerator[LLMChunk, None]:
+        resolved = _resolve_model(model, self.model)
         try:
             async for chunk in await self._client.chat(
-                model=model or self.model,
+                model=resolved,
                 messages=_to_ollama_messages(messages),
                 options=_base_options(temperature),
                 stream=True,
@@ -119,20 +148,23 @@
                     prompt_tokens=chunk.prompt_eval_count if chunk.done else None,
                     completion_tokens=chunk.eval_count if chunk.done else None,
                 )
+        except (LLMConnectionError, LLMModelNotFoundError, LLMBackendError):
+            raise
         except Exception as e:
-            raise LLMBackendError(str(e)) from e
+            raise _classify_error(e) from e
 
     async def stream_complete(
         self,
         messages: list[Message],
         tools: list[ToolSchema] | None = None,
         temperature: float = 0.7,
-        model: str | None = None,
+        model: "list[str] | str | None" = None,
         think: bool | None = None,
     ) -> AsyncGenerator[LLMChunk, None]:
+        resolved = _resolve_model(model, self.model)
         try:
             kwargs: dict = {
-                "model": model or self.model,
+                "model": resolved,
                 "messages": _to_ollama_messages(messages),
                 "options": _base_options(temperature, think=think),
                 "stream": True,
@@ -167,5 +199,7 @@
                     prompt_tokens=chunk.prompt_eval_count if chunk.done else None,
                     completion_tokens=chunk.eval_count if chunk.done else None,
                 )
+        except (LLMConnectionError, LLMModelNotFoundError, LLMBackendError):
+            raise
         except Exception as e:
-            raise LLMBackendError(str(e)) from e
+            raise _classify_error(e) from e
diff --git a/navi/profiles/base.py b/navi/profiles/base.py
index 7a99966..e140656 100644
--- a/navi/profiles/base.py
+++ b/navi/profiles/base.py
@@ -15,7 +15,13 @@
     system_prompt: str
     enabled_tools: list[str]  # tool names; resolved by ToolRegistry at runtime
     llm_backend: str = "ollama"  # backend key, e.g. "ollama", "openai"
-    model: str = "gemma4:31b-cloud"
+    # Ordered list of preferred models; first available wins at runtime.
+    # Accepts a plain string for backward compatibility (auto-wrapped in a list).
+    model: list[str] = field(default_factory=lambda: ["gemma4:31b-cloud"])
+
+    def __post_init__(self) -> None:
+        if isinstance(self.model, str):
+            self.model = [self.model]
     max_iterations: int = 10
     temperature: float = 0.7
     planning_enabled: bool = False  # if True, run a planning LLM call before the main loop
diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json
index 39d7496..69036e4 100644
--- a/navi/profiles/developer/config.json
+++ b/navi/profiles/developer/config.json
@@ -9,7 +9,10 @@
     "key_tools": "filesystem, code_exec, terminal, web_search, web_view, spawn_agent"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b-cloud",
+  "model": [
+    "gemma4:31b-cloud",
+    "gemma4:26b-a4b-it-q4_K_M"
+  ],
   "temperature": 0.2,
   "max_iterations": 35,
   "planning_enabled": true,
@@ -24,17 +27,36 @@
   "step_validation_enabled": false,
   "adaptive_replan_enabled": true,
   "subagent_tools": [
-    "todo", "scratchpad", "reflect",
-    "web_search", "web_view", "http_request",
-    "filesystem", "code_exec", "terminal", "image_view",
-    "list_tools", "share_file"
+    "todo",
+    "scratchpad",
+    "reflect",
+    "web_search",
+    "web_view",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "terminal",
+    "image_view",
+    "list_tools",
+    "share_file"
   ],
   "enabled_tools": [
-    "todo", "scratchpad", "reflect", "switch_profile", "list_profiles",
-    "web_search", "web_view", "http_request",
-    "filesystem", "code_exec", "terminal", "image_view",
-    "memory", "list_tools",
-    "spawn_agent", "share_file",
+    "todo",
+    "scratchpad",
+    "reflect",
+    "switch_profile",
+    "list_profiles",
+    "web_search",
+    "web_view",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "terminal",
+    "image_view",
+    "memory",
+    "list_tools",
+    "spawn_agent",
+    "share_file",
     "email_manager"
   ]
-}
+}
\ No newline at end of file
diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py
index 4402efa..3b21874 100644
--- a/navi/profiles/loader.py
+++ b/navi/profiles/loader.py
@@ -19,6 +19,13 @@
 _REQUIRED_CONFIG_KEYS = {"id", "name", "description", "enabled_tools"}
 
 
+def _normalize_model(value: object) -> list[str]:
+    """Accept str or list[str], always return list[str]."""
+    if isinstance(value, list):
+        return [str(m) for m in value] or ["gemma4:31b-cloud"]
+    return [str(value)] if value else ["gemma4:31b-cloud"]
+
+
 def load_profiles_from_dir(profiles_dir: str | Path) -> list[AgentProfile]:
     """Load all valid profile directories under profiles_dir."""
     base = Path(profiles_dir)
@@ -59,7 +66,7 @@
                 system_prompt=system_prompt,
                 enabled_tools=config["enabled_tools"],
                 llm_backend=config.get("llm_backend", "ollama"),
-                model=config.get("model", "gemma4:31b-cloud"),
+                model=_normalize_model(config.get("model", ["gemma4:31b-cloud"])),
                 temperature=config.get("temperature", 0.7),
                 max_iterations=config.get("max_iterations", 20),
                 planning_enabled=config.get("planning_enabled", False),
diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json
index a144b16..8f52c5a 100644
--- a/navi/profiles/secretary/config.json
+++ b/navi/profiles/secretary/config.json
@@ -9,7 +9,10 @@
     "key_tools": "web_search, web_view, filesystem, code_exec, gmail, todo, scratchpad, spawn_agent, memory"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b-cloud",
+  "model": [
+    "gemma4:31b-cloud",
+    "gemma4:26b-a4b-it-q4_K_M"
+  ],
   "temperature": 0.5,
   "max_iterations": 25,
   "planning_enabled": true,
@@ -24,22 +27,36 @@
   "step_validation_enabled": false,
   "adaptive_replan_enabled": false,
   "subagent_tools": [
-    "scratchpad", "reflect",
-    "web_search", "web_view", "http_request",
-    "filesystem", "code_exec", "image_view",
+    "scratchpad",
+    "reflect",
+    "web_search",
+    "web_view",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "image_view",
     "memory",
     "share_file",
     "weather"
   ],
   "enabled_tools": [
-    "todo", "scratchpad", "reflect", "switch_profile", "list_profiles",
-    "web_search", "web_view", "http_request",
-    "filesystem", "code_exec", "image_view",
+    "todo",
+    "scratchpad",
+    "reflect",
+    "switch_profile",
+    "list_profiles",
+    "web_search",
+    "web_view",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "image_view",
     "memory",
-    "list_tools", "tool_manual",
+    "list_tools",
+    "tool_manual",
     "spawn_agent",
     "share_file",
     "weather",
     "email_manager"
   ]
-}
+}
\ No newline at end of file
diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json
index 64039b4..0f7e679 100644
--- a/navi/profiles/server_admin/config.json
+++ b/navi/profiles/server_admin/config.json
@@ -9,7 +9,10 @@
     "key_tools": "ssh_exec, terminal, filesystem, code_exec, web_search, spawn_agent, memory"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b-cloud",
+  "model": [
+    "gemma4:31b-cloud",
+    "gemma4:26b-a4b-it-q4_K_M"
+  ],
   "temperature": 0.2,
   "max_iterations": 20,
   "planning_enabled": true,
@@ -24,19 +27,36 @@
   "step_validation_enabled": false,
   "adaptive_replan_enabled": false,
   "subagent_tools": [
-    "scratchpad", "reflect",
-    "web_search", "http_request",
-    "filesystem", "code_exec", "terminal", "ssh_exec", "image_view",
+    "scratchpad",
+    "reflect",
+    "web_search",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "terminal",
+    "ssh_exec",
+    "image_view",
     "share_file"
   ],
   "enabled_tools": [
-    "todo", "scratchpad", "reflect", "switch_profile", "list_profiles",
-    "web_search", "web_view", "http_request",
-    "filesystem", "code_exec", "terminal", "ssh_exec", "image_view",
+    "todo",
+    "scratchpad",
+    "reflect",
+    "switch_profile",
+    "list_profiles",
+    "web_search",
+    "web_view",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "terminal",
+    "ssh_exec",
+    "image_view",
     "memory",
-    "list_tools", "tool_manual",
+    "list_tools",
+    "tool_manual",
     "spawn_agent",
     "share_file",
     "email_manager"
   ]
-}
+}
\ No newline at end of file
diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json
index 8cb5a99..471fb84 100644
--- a/navi/profiles/tool_developer/config.json
+++ b/navi/profiles/tool_developer/config.json
@@ -9,7 +9,10 @@
     "key_tools": "write_tool, reload_tools, delete_tool, test_tool, filesystem, terminal, code_exec, memory"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b-cloud",
+  "model": [
+    "gemma4:31b-cloud",
+    "gemma4:26b-a4b-it-q4_K_M"
+  ],
   "temperature": 0.2,
   "max_iterations": 35,
   "planning_enabled": true,
@@ -24,21 +27,45 @@
   "step_validation_enabled": false,
   "adaptive_replan_enabled": true,
   "subagent_tools": [
-    "todo", "scratchpad", "reflect",
-    "web_search", "web_view", "http_request",
-    "filesystem", "code_exec", "terminal", "image_view",
-    "write_tool", "reload_tools", "delete_tool", "list_tools", "tool_manual", "test_tool",
+    "todo",
+    "scratchpad",
+    "reflect",
+    "web_search",
+    "web_view",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "terminal",
+    "image_view",
+    "write_tool",
+    "reload_tools",
+    "delete_tool",
+    "list_tools",
+    "tool_manual",
+    "test_tool",
     "share_file"
   ],
   "enabled_tools": [
-    "todo", "scratchpad", "reflect", "switch_profile", "list_profiles",
-    "web_search", "web_view", "http_request",
-    "filesystem", "code_exec", "terminal", "image_view",
+    "todo",
+    "scratchpad",
+    "reflect",
+    "switch_profile",
+    "list_profiles",
+    "web_search",
+    "web_view",
+    "http_request",
+    "filesystem",
+    "code_exec",
+    "terminal",
+    "image_view",
     "memory",
-    "reload_tools", "delete_tool", "list_tools", "tool_manual",
+    "reload_tools",
+    "delete_tool",
+    "list_tools",
+    "tool_manual",
     "test_tool",
     "spawn_agent",
     "share_file",
     "email_manager"
   ]
-}
+}
\ No newline at end of file
diff --git a/navi/tools/base.py b/navi/tools/base.py
index bda8930..f86bdd8 100644
--- a/navi/tools/base.py
+++ b/navi/tools/base.py
@@ -29,7 +29,7 @@
 
 # Set by run_stream() / run_ephemeral() to expose the current profile's model name
 # to tools that need to make their own LLM calls (e.g. AIHelper-powered tools).
-current_model: ContextVar[str | None] = ContextVar("current_model", default=None)
+current_model: ContextVar[list[str] | str | None] = ContextVar("current_model", default=None)
 
 
 @dataclass