diff --git a/docs/tech_debt_review_2026-04-29.md b/docs/tech_debt_review_2026-04-29.md
index 8e643d7..aa3cabc 100644
--- a/docs/tech_debt_review_2026-04-29.md
+++ b/docs/tech_debt_review_2026-04-29.md
@@ -86,6 +86,7 @@
 | 51 | **Frontend: imprecise relative-time updates** | `webclient/src/composables/useTime.js` | 36 | 30-second interval means "just now" can be stale for up to 30s before jumping. |
 | 52 | **Frontend: stream message ID collision risk** | `webclient/src/stores/chat.js` | 148 | `id: 'stream_${Date.now()}'` could collide within the same millisecond on rapid reconnect. |
 | 53 | **[FIXED] Frontend: flawed meta-row visibility logic** | `webclient/src/components/messages/AssistantMessage.vue` | 102-104 | `hasMeta` uses `|| props.msg.tool_call_count`, so `0` is falsy and may incorrectly hide the meta row. |
+| 54 | **Dead `stream()` method on LLM backends** | `navi/llm/ollama.py:149`, `navi/llm/fallback.py:168` | `OllamaBackend.stream()` and `FallbackOllamaBackend.stream()` exist in code but are never called by `agent.py` or `messages.py`. Only `stream_complete()` is used. Historical purpose unknown — investigate before removing. May have been for a prior streaming architecture where thinking/tool deltas were separate from `stream_complete`. |
 
 ---
 
diff --git a/navi/api/routes/admin.py b/navi/api/routes/admin.py
index ff92885..dbac8cd 100644
--- a/navi/api/routes/admin.py
+++ b/navi/api/routes/admin.py
@@ -229,6 +229,17 @@
     return {"ok": True}
 
 
+@router.post("/ollama/clear-blacklists", status_code=204)
+async def admin_clear_ollama_blacklists(
+    user: Annotated[User, Depends(require_admin)],
+) -> None:
+    """Clear dead-server and dead-model blacklists for the Ollama fallback backend."""
+    from navi.llm.fallback import clear_blacklists
+
+    clear_blacklists()
+    log.info("admin.ollama_blacklists_cleared", admin_id=user.id)
+
+
 @router.get("/profiles")
 async def admin_list_profiles(
     user: Annotated[User, Depends(require_permission("navi.profiles.manage"))],
diff --git a/navi/core/agent.py b/navi/core/agent.py
index 9112ae2..99980f9 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -258,6 +258,7 @@
                 tools=tool_schemas if tools else None,
                 temperature=profile.temperature,
                 model=profile.model,
+                think=profile.think_enabled,
                 top_k=profile.top_k,
                 top_p=profile.top_p,
                 num_thread=profile.num_thread,
diff --git a/navi/core/compressor.py b/navi/core/compressor.py
index efd460b..6cb9874 100644
--- a/navi/core/compressor.py
+++ b/navi/core/compressor.py
@@ -186,7 +186,7 @@
 async def compress_context(
     context: list[Message],
     llm: LLMBackend,
-    model: str,
+    model: "list[str] | str | None",
     temperature: float,
     keep_recent: int,
     max_tokens: int | None = None,
diff --git a/navi/core/registry.py b/navi/core/registry.py
index 04b0d71..a7b24d5 100644
--- a/navi/core/registry.py
+++ b/navi/core/registry.py
@@ -3,8 +3,7 @@
 from navi.config import settings
 from navi.exceptions import ProfileNotFound, ToolNotFound
 from navi.llm.base import LLMBackend
-from navi.llm.ollama import OllamaBackend
-from navi.llm.fallback import FallbackOllamaBackend, load_servers_from_file
+from navi.llm.fallback import FallbackOllamaBackend, ServerEntry, load_servers_from_file
 from navi.profiles import ALL_PROFILES
 from navi.profiles.base import AgentProfile
 from navi.tools import (
@@ -137,7 +136,6 @@
 def _discover_backends() -> list[tuple[str, LLMBackend]]:
     """Auto-discover LLM backends from navi/llm/ modules."""
     discovered: list[tuple[str, LLMBackend]] = []
-    from navi.llm.ollama import OllamaBackend
     from navi.llm.fallback import FallbackOllamaBackend
     from navi.llm.openai_backend import OpenAIBackend
     ollama_http_timeout = max(
@@ -146,17 +144,16 @@
         settings.llm_stream_first_chunk_timeout,
     )
 
-    # Ollama backend (primary)
+    # Ollama backend (primary) — always use FallbackOllamaBackend so model
+    # priority lists work regardless of multi-server vs single-server config.
     if settings.ollama_backends_file:
         servers = load_servers_from_file(settings.ollama_backends_file)
-        discovered.append(("ollama", FallbackOllamaBackend(servers)))
     else:
-        discovered.append(("ollama", OllamaBackend(
-            model=settings.ollama_default_model,
+        servers = [ServerEntry(
             host=settings.ollama_host,
             api_key=settings.ollama_api_key,
-            timeout=ollama_http_timeout,
-        )))
+        )]
+    discovered.append(("ollama", FallbackOllamaBackend(servers)))
 
     # OpenAI backend (if configured)
     if settings.openai_api_key:
diff --git a/navi/llm/fallback.py b/navi/llm/fallback.py
index 9bb8106..e79c086 100644
--- a/navi/llm/fallback.py
+++ b/navi/llm/fallback.py
@@ -62,10 +62,35 @@
     return True
 
 
+def clear_blacklists() -> None:
+    """Manually clear all dead-server and dead-model blacklists."""
+    _dead_servers.clear()
+    _dead_models.clear()
+    log.info("fallback.blacklists_cleared")
+
+
 def load_servers_from_file(path: str) -> list[ServerEntry]:
-    """Load server list from a JSON file: [{host, api_key?}, ...]"""
-    data = json.loads(Path(path).read_text(encoding="utf-8"))
-    return [ServerEntry(host=e["host"], api_key=e.get("api_key", "")) for e in data]
+    """Load server list from a JSON file: [{host, api_key?}, ...]
+
+    Returns an empty list on missing file, bad JSON, or entries without a host.
+    """
+    try:
+        data = json.loads(Path(path).read_text(encoding="utf-8"))
+    except FileNotFoundError:
+        log.warning("fallback.servers_file_missing", path=path)
+        return []
+    except json.JSONDecodeError:
+        log.warning("fallback.servers_file_bad_json", path=path)
+        return []
+
+    servers: list[ServerEntry] = []
+    for i, entry in enumerate(data):
+        host = entry.get("host")
+        if not host:
+            log.warning("fallback.servers_file_missing_host", index=i, entry=entry)
+            continue
+        servers.append(ServerEntry(host=host, api_key=entry.get("api_key", "")))
+    return servers
 
 
 class FallbackOllamaBackend(LLMBackend):
diff --git a/tests/unit/core/test_registry.py b/tests/unit/core/test_registry.py
index bed9e98..908e07b 100644
--- a/tests/unit/core/test_registry.py
+++ b/tests/unit/core/test_registry.py
@@ -83,6 +83,7 @@
 class TestBackendDiscovery:
     def test_primary_ollama_uses_expanded_timeout(self, monkeypatch):
         import navi.core.registry as registry_mod
+        import navi.llm.fallback as fallback_mod
 
         captured = {}
 
@@ -95,14 +96,14 @@
         monkeypatch.setattr(registry_mod.settings, "ollama_request_timeout", 30)
         monkeypatch.setattr(registry_mod.settings, "llm_complete_timeout", 120)
         monkeypatch.setattr(registry_mod.settings, "llm_stream_first_chunk_timeout", 180)
-
-        import navi.llm.ollama as ollama_mod
-
-        monkeypatch.setattr(ollama_mod, "OllamaBackend", FakeOllamaBackend)
+        monkeypatch.setattr(fallback_mod, "OllamaBackend", FakeOllamaBackend)
 
         discovered = registry_mod._discover_backends()
 
         assert discovered[0][0] == "ollama"
+        assert isinstance(discovered[0][1], fallback_mod.FallbackOllamaBackend)
+        # _get_client lazily creates the inner OllamaBackend
+        discovered[0][1]._get_client(fallback_mod.ServerEntry(host="http://test"))
         assert captured["timeout"] == 180
 
 
diff --git a/tests/unit/llm/test_ollama.py b/tests/unit/llm/test_ollama.py
index 1524ac3..3906f2a 100644
--- a/tests/unit/llm/test_ollama.py
+++ b/tests/unit/llm/test_ollama.py
@@ -65,3 +65,57 @@
         "top_k": 20,
         "top_p": 0.8,
     }
+
+
+def test_fallback_with_single_server(monkeypatch):
+    import navi.llm.fallback as fallback_mod
+    import navi.config as config_mod
+
+    captured = {}
+
+    class FakeOllamaBackend:
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+
+    monkeypatch.setattr(fallback_mod, "OllamaBackend", FakeOllamaBackend)
+
+    backend = FallbackOllamaBackend([ServerEntry(host="http://localhost:11434", api_key="")])
+    client = backend._get_client(ServerEntry(host="http://localhost:11434"))
+
+    assert client is not None
+
+
+def test_load_servers_missing_file_returns_empty(tmp_path):
+    from navi.llm.fallback import load_servers_from_file
+
+    missing = tmp_path / "no_such_file.json"
+    assert load_servers_from_file(str(missing)) == []
+
+
+def test_load_servers_bad_json_returns_empty(tmp_path):
+    from navi.llm.fallback import load_servers_from_file
+
+    bad = tmp_path / "bad.json"
+    bad.write_text("not json", encoding="utf-8")
+    assert load_servers_from_file(str(bad)) == []
+
+
+def test_load_servers_missing_host_skipped(tmp_path):
+    from navi.llm.fallback import load_servers_from_file, ServerEntry
+
+    f = tmp_path / "servers.json"
+    f.write_text('[{"host": "http://a"}, {"api_key": "k"}]', encoding="utf-8")
+    result = load_servers_from_file(str(f))
+    assert result == [ServerEntry(host="http://a", api_key="")]
+
+
+def test_clear_blacklists_resets_state(monkeypatch):
+    import navi.llm.fallback as fallback_mod
+
+    fallback_mod._dead_servers["http://dead"] = 0.0
+    fallback_mod._dead_models[("http://dead", "model")] = 0.0
+
+    fallback_mod.clear_blacklists()
+
+    assert fallback_mod._dead_servers == {}
+    assert fallback_mod._dead_models == {}