diff --git a/navi/core/registry.py b/navi/core/registry.py index e6e340c..84af31c 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -143,12 +143,19 @@ # priority lists work regardless of multi-server vs single-server config. if settings.ollama_backends_file: servers = load_servers_from_file(settings.ollama_backends_file) + if not servers and settings.ollama_host: + log.warning("fallback.backends_file_empty", path=settings.ollama_backends_file) else: + servers = [] + + if not servers and settings.ollama_host: servers = [ServerEntry( host=settings.ollama_host, api_key=settings.ollama_api_key, )] - discovered.append(("ollama", FallbackOllamaBackend(servers))) + + if servers: + discovered.append(("ollama", FallbackOllamaBackend(servers))) # OpenAI backend (if configured) if settings.openai_api_key: diff --git a/navi/llm/fallback.py b/navi/llm/fallback.py index f54bf87..f3197c5 100644 --- a/navi/llm/fallback.py +++ b/navi/llm/fallback.py @@ -135,6 +135,7 @@ ) -> LLMResponse: models = self._model_list(model) last_err: Exception = LLMBackendError("No backends configured") + single_server = len(self._servers) <= 1 for server in self._servers: if _is_dead_server(server.host): @@ -150,12 +151,18 @@ ) except LLMConnectionError as e: log.warning("fallback.server_dead", host=server.host, error=str(e)) - _dead_servers[server.host] = time.monotonic() last_err = e + if single_server: + # Do not blacklist the only server — the next request + # should retry immediately instead of being blocked + # for _TTL seconds. + break + _dead_servers[server.host] = time.monotonic() break # Skip remaining models — server is gone except LLMModelNotFoundError as e: log.warning("fallback.model_dead", host=server.host, model=m, error=str(e)) - _dead_models[(server.host, m)] = time.monotonic() + if not single_server: + _dead_models[(server.host, m)] = time.monotonic() last_err = e # Continue to next model on the same server @@ -168,6 +175,7 @@ ) -> list[list[float]]: models = self._model_list(model) last_err: Exception = LLMBackendError("No backends configured") + single_server = len(self._servers) <= 1 for server in self._servers: if _is_dead_server(server.host): @@ -179,12 +187,15 @@ return await self._get_client(server).embed(texts, model=m) except LLMConnectionError as e: log.warning("fallback.server_dead", host=server.host, error=str(e)) - _dead_servers[server.host] = time.monotonic() last_err = e + if single_server: + break + _dead_servers[server.host] = time.monotonic() break except LLMModelNotFoundError as e: log.warning("fallback.model_dead", host=server.host, model=m, error=str(e)) - _dead_models[(server.host, m)] = time.monotonic() + if not single_server: + _dead_models[(server.host, m)] = time.monotonic() last_err = e continue @@ -203,6 +214,7 @@ ) -> AsyncGenerator[LLMChunk, None]: models = self._model_list(model) last_err: Exception = LLMBackendError("No backends configured") + single_server = len(self._servers) <= 1 for server in self._servers: if _is_dead_server(server.host): @@ -221,12 +233,15 @@ continue except LLMConnectionError as e: log.warning("fallback.server_dead", host=server.host, error=str(e)) - _dead_servers[server.host] = time.monotonic() last_err = e + if single_server: + break + _dead_servers[server.host] = time.monotonic() break except LLMModelNotFoundError as e: log.warning("fallback.model_dead", host=server.host, model=m, error=str(e)) - _dead_models[(server.host, m)] = time.monotonic() + if not single_server: + _dead_models[(server.host, m)] = time.monotonic() last_err = e continue else: diff --git a/tests/unit/llm/test_ollama.py b/tests/unit/llm/test_ollama.py index a8d36e9..bb3b49e 100644 --- a/tests/unit/llm/test_ollama.py +++ b/tests/unit/llm/test_ollama.py @@ -1,6 +1,7 @@ """Unit tests for Ollama backend helpers.""" import httpx +import pytest from navi.exceptions import LLMConnectionError from navi.llm.fallback import FallbackOllamaBackend, ServerEntry @@ -127,3 +128,101 @@ assert fallback_mod._dead_servers == {} assert fallback_mod._dead_models == {} + + +def test_single_server_not_blacklisted_on_connection_error(monkeypatch): + import navi.llm.fallback as fallback_mod + + fallback_mod.clear_blacklists() + + class FailingOllamaBackend: + def __init__(self, **kwargs): + pass + + async def complete(self, *args, **kwargs): + raise fallback_mod.LLMConnectionError("connection refused") + + monkeypatch.setattr(fallback_mod, "OllamaBackend", FailingOllamaBackend) + + backend = FallbackOllamaBackend([ServerEntry(host="http://localhost:11434")]) + + import asyncio + with pytest.raises(fallback_mod.LLMBackendError): + asyncio.run(backend.complete([])) + + # The single server must NOT be blacklisted so the next request retries immediately + assert fallback_mod._dead_servers == {} + assert fallback_mod._dead_models == {} + + +def test_multi_server_blacklists_dead_server(monkeypatch): + import navi.llm.fallback as fallback_mod + + fallback_mod.clear_blacklists() + + class FailingOllamaBackend: + def __init__(self, **kwargs): + pass + + async def complete(self, *args, **kwargs): + raise fallback_mod.LLMConnectionError("connection refused") + + monkeypatch.setattr(fallback_mod, "OllamaBackend", FailingOllamaBackend) + + backend = FallbackOllamaBackend([ + ServerEntry(host="http://dead:11434"), + ServerEntry(host="http://alive:11434"), + ]) + + import asyncio + with pytest.raises(fallback_mod.LLMBackendError): + asyncio.run(backend.complete([])) + + # Dead server should be blacklisted; alive server was never reached because + # FallbackOllamaBackend breaks after the first server fails. + # (Both failed, so both are blacklisted if they each raised LLMConnectionError.) + assert "http://dead:11434" in fallback_mod._dead_servers + assert "http://alive:11434" in fallback_mod._dead_servers + + +def test_single_server_model_not_blacklisted_on_model_not_found(monkeypatch): + import navi.llm.fallback as fallback_mod + + fallback_mod.clear_blacklists() + + class FailingOllamaBackend: + def __init__(self, **kwargs): + pass + + async def complete(self, *args, **kwargs): + raise fallback_mod.LLMModelNotFoundError("model missing") + + monkeypatch.setattr(fallback_mod, "OllamaBackend", FailingOllamaBackend) + + backend = FallbackOllamaBackend([ServerEntry(host="http://localhost:11434")]) + + import asyncio + with pytest.raises(fallback_mod.LLMBackendError): + asyncio.run(backend.complete([])) + + # Single server should not blacklist the model either + assert fallback_mod._dead_models == {} + + +def test_discover_backends_empty_file_fallback_to_host(monkeypatch): + import navi.core.registry as registry_mod + import navi.config as config_mod + from navi.config import Settings + + monkeypatch.setattr( + config_mod, "settings", + Settings( + ollama_host="http://fallback-ollama:11434", + ollama_backends_file="/nonexistent/backends.json", + ) + ) + + backends = registry_mod._discover_backends() + keys = [k for k, _ in backends] + + assert "ollama" in keys