diff --git a/docs/tech_debt_review_2026-04-29.md b/docs/tech_debt_review_2026-04-29.md index 8e643d7..aa3cabc 100644 --- a/docs/tech_debt_review_2026-04-29.md +++ b/docs/tech_debt_review_2026-04-29.md @@ -86,6 +86,7 @@ | 51 | **Frontend: imprecise relative-time updates** | `webclient/src/composables/useTime.js` | 36 | 30-second interval means "just now" can be stale for up to 30s before jumping. | | 52 | **Frontend: stream message ID collision risk** | `webclient/src/stores/chat.js` | 148 | `id: 'stream_${Date.now()}'` could collide within the same millisecond on rapid reconnect. | | 53 | **[FIXED] Frontend: flawed meta-row visibility logic** | `webclient/src/components/messages/AssistantMessage.vue` | 102-104 | `hasMeta` uses `|| props.msg.tool_call_count`, so `0` is falsy and may incorrectly hide the meta row. | +| 54 | **Dead `stream()` method on LLM backends** | `navi/llm/ollama.py:149`, `navi/llm/fallback.py:168` | `OllamaBackend.stream()` and `FallbackOllamaBackend.stream()` exist in code but are never called by `agent.py` or `messages.py`. Only `stream_complete()` is used. Historical purpose unknown — investigate before removing. May have been for a prior streaming architecture where thinking/tool deltas were separate from `stream_complete`. | --- diff --git a/navi/api/routes/admin.py b/navi/api/routes/admin.py index ff92885..dbac8cd 100644 --- a/navi/api/routes/admin.py +++ b/navi/api/routes/admin.py @@ -229,6 +229,17 @@ return {"ok": True} +@router.post("/ollama/clear-blacklists", status_code=204) +async def admin_clear_ollama_blacklists( + user: Annotated[User, Depends(require_admin)], +) -> None: + """Clear dead-server and dead-model blacklists for the Ollama fallback backend.""" + from navi.llm.fallback import clear_blacklists + + clear_blacklists() + log.info("admin.ollama_blacklists_cleared", admin_id=user.id) + + @router.get("/profiles") async def admin_list_profiles( user: Annotated[User, Depends(require_permission("navi.profiles.manage"))], diff --git a/navi/core/agent.py b/navi/core/agent.py index 9112ae2..99980f9 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -258,6 +258,7 @@ tools=tool_schemas if tools else None, temperature=profile.temperature, model=profile.model, + think=profile.think_enabled, top_k=profile.top_k, top_p=profile.top_p, num_thread=profile.num_thread, diff --git a/navi/core/compressor.py b/navi/core/compressor.py index efd460b..6cb9874 100644 --- a/navi/core/compressor.py +++ b/navi/core/compressor.py @@ -186,7 +186,7 @@ async def compress_context( context: list[Message], llm: LLMBackend, - model: str, + model: "list[str] | str | None", temperature: float, keep_recent: int, max_tokens: int | None = None, diff --git a/navi/core/registry.py b/navi/core/registry.py index 04b0d71..a7b24d5 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -3,8 +3,7 @@ from navi.config import settings from navi.exceptions import ProfileNotFound, ToolNotFound from navi.llm.base import LLMBackend -from navi.llm.ollama import OllamaBackend -from navi.llm.fallback import FallbackOllamaBackend, load_servers_from_file +from navi.llm.fallback import FallbackOllamaBackend, ServerEntry, load_servers_from_file from navi.profiles import ALL_PROFILES from navi.profiles.base import AgentProfile from navi.tools import ( @@ -137,7 +136,6 @@ def _discover_backends() -> list[tuple[str, LLMBackend]]: """Auto-discover LLM backends from navi/llm/ modules.""" discovered: list[tuple[str, LLMBackend]] = [] - from navi.llm.ollama import OllamaBackend from navi.llm.fallback import FallbackOllamaBackend from navi.llm.openai_backend import OpenAIBackend ollama_http_timeout = max( @@ -146,17 +144,16 @@ settings.llm_stream_first_chunk_timeout, ) - # Ollama backend (primary) + # Ollama backend (primary) — always use FallbackOllamaBackend so model + # priority lists work regardless of multi-server vs single-server config. if settings.ollama_backends_file: servers = load_servers_from_file(settings.ollama_backends_file) - discovered.append(("ollama", FallbackOllamaBackend(servers))) else: - discovered.append(("ollama", OllamaBackend( - model=settings.ollama_default_model, + servers = [ServerEntry( host=settings.ollama_host, api_key=settings.ollama_api_key, - timeout=ollama_http_timeout, - ))) + )] + discovered.append(("ollama", FallbackOllamaBackend(servers))) # OpenAI backend (if configured) if settings.openai_api_key: diff --git a/navi/llm/fallback.py b/navi/llm/fallback.py index 9bb8106..e79c086 100644 --- a/navi/llm/fallback.py +++ b/navi/llm/fallback.py @@ -62,10 +62,35 @@ return True +def clear_blacklists() -> None: + """Manually clear all dead-server and dead-model blacklists.""" + _dead_servers.clear() + _dead_models.clear() + log.info("fallback.blacklists_cleared") + + def load_servers_from_file(path: str) -> list[ServerEntry]: - """Load server list from a JSON file: [{host, api_key?}, ...]""" - data = json.loads(Path(path).read_text(encoding="utf-8")) - return [ServerEntry(host=e["host"], api_key=e.get("api_key", "")) for e in data] + """Load server list from a JSON file: [{host, api_key?}, ...] + + Returns an empty list on missing file, bad JSON, or entries without a host. + """ + try: + data = json.loads(Path(path).read_text(encoding="utf-8")) + except FileNotFoundError: + log.warning("fallback.servers_file_missing", path=path) + return [] + except json.JSONDecodeError: + log.warning("fallback.servers_file_bad_json", path=path) + return [] + + servers: list[ServerEntry] = [] + for i, entry in enumerate(data): + host = entry.get("host") + if not host: + log.warning("fallback.servers_file_missing_host", index=i, entry=entry) + continue + servers.append(ServerEntry(host=host, api_key=entry.get("api_key", ""))) + return servers class FallbackOllamaBackend(LLMBackend): diff --git a/tests/unit/core/test_registry.py b/tests/unit/core/test_registry.py index bed9e98..908e07b 100644 --- a/tests/unit/core/test_registry.py +++ b/tests/unit/core/test_registry.py @@ -83,6 +83,7 @@ class TestBackendDiscovery: def test_primary_ollama_uses_expanded_timeout(self, monkeypatch): import navi.core.registry as registry_mod + import navi.llm.fallback as fallback_mod captured = {} @@ -95,14 +96,14 @@ monkeypatch.setattr(registry_mod.settings, "ollama_request_timeout", 30) monkeypatch.setattr(registry_mod.settings, "llm_complete_timeout", 120) monkeypatch.setattr(registry_mod.settings, "llm_stream_first_chunk_timeout", 180) - - import navi.llm.ollama as ollama_mod - - monkeypatch.setattr(ollama_mod, "OllamaBackend", FakeOllamaBackend) + monkeypatch.setattr(fallback_mod, "OllamaBackend", FakeOllamaBackend) discovered = registry_mod._discover_backends() assert discovered[0][0] == "ollama" + assert isinstance(discovered[0][1], fallback_mod.FallbackOllamaBackend) + # _get_client lazily creates the inner OllamaBackend + discovered[0][1]._get_client(fallback_mod.ServerEntry(host="http://test")) assert captured["timeout"] == 180 diff --git a/tests/unit/llm/test_ollama.py b/tests/unit/llm/test_ollama.py index 1524ac3..3906f2a 100644 --- a/tests/unit/llm/test_ollama.py +++ b/tests/unit/llm/test_ollama.py @@ -65,3 +65,57 @@ "top_k": 20, "top_p": 0.8, } + + +def test_fallback_with_single_server(monkeypatch): + import navi.llm.fallback as fallback_mod + import navi.config as config_mod + + captured = {} + + class FakeOllamaBackend: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr(fallback_mod, "OllamaBackend", FakeOllamaBackend) + + backend = FallbackOllamaBackend([ServerEntry(host="http://localhost:11434", api_key="")]) + client = backend._get_client(ServerEntry(host="http://localhost:11434")) + + assert client is not None + + +def test_load_servers_missing_file_returns_empty(tmp_path): + from navi.llm.fallback import load_servers_from_file + + missing = tmp_path / "no_such_file.json" + assert load_servers_from_file(str(missing)) == [] + + +def test_load_servers_bad_json_returns_empty(tmp_path): + from navi.llm.fallback import load_servers_from_file + + bad = tmp_path / "bad.json" + bad.write_text("not json", encoding="utf-8") + assert load_servers_from_file(str(bad)) == [] + + +def test_load_servers_missing_host_skipped(tmp_path): + from navi.llm.fallback import load_servers_from_file, ServerEntry + + f = tmp_path / "servers.json" + f.write_text('[{"host": "http://a"}, {"api_key": "k"}]', encoding="utf-8") + result = load_servers_from_file(str(f)) + assert result == [ServerEntry(host="http://a", api_key="")] + + +def test_clear_blacklists_resets_state(monkeypatch): + import navi.llm.fallback as fallback_mod + + fallback_mod._dead_servers["http://dead"] = 0.0 + fallback_mod._dead_models[("http://dead", "model")] = 0.0 + + fallback_mod.clear_blacklists() + + assert fallback_mod._dead_servers == {} + assert fallback_mod._dead_models == {}