diff --git a/navi/api/deps.py b/navi/api/deps.py index 9aed576..0881e00 100644 --- a/navi/api/deps.py +++ b/navi/api/deps.py @@ -18,20 +18,20 @@ @lru_cache -def _registries() -> tuple[ToolRegistry, ProfileRegistry, BackendRegistry]: +def get_registries() -> tuple[ToolRegistry, ProfileRegistry, BackendRegistry]: return build_default_registries() def get_tool_registry() -> ToolRegistry: - return _registries()[0] + return get_registries()[0] def get_profile_registry() -> ProfileRegistry: - return _registries()[1] + return get_registries()[1] def get_backend_registry() -> BackendRegistry: - return _registries()[2] + return get_registries()[2] _session_store = SqliteSessionStore(settings.db_path) diff --git a/navi/api/websocket.py b/navi/api/websocket.py index e00d272..3a5b1cb 100644 --- a/navi/api/websocket.py +++ b/navi/api/websocket.py @@ -41,8 +41,8 @@ log.info("ws.connected", session_id=session_id) # Build agent (can't use FastAPI Depends inside WebSocket directly) - from navi.api.deps import _registries - tools, profiles, backends = _registries() + from navi.api.deps import get_registries + tools, profiles, backends = get_registries() agent = Agent(session_store, profiles, tools, backends) try: diff --git a/navi/core/agent.py b/navi/core/agent.py index ef36fa7..d5af9f1 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -108,7 +108,7 @@ profile = self._profiles.get(session.profile_id) tools = self._tool_list(profile.enabled_tools) tool_schemas = [t.schema() for t in tools] - llm = self._get_backend(profile.llm_backend, profile.model) + llm = self._get_backend(profile.llm_backend) # Inject system prompt on first message if not session.messages: @@ -122,6 +122,7 @@ session.messages, tools=tool_schemas if tools else None, temperature=profile.temperature, + model=profile.model, ) if response.finish_reason == "stop" or not response.tool_calls: @@ -161,7 +162,7 @@ profile = self._profiles.get(session.profile_id) tools = self._tool_list(profile.enabled_tools) tool_schemas = [t.schema() for t in tools] - llm = self._get_backend(profile.llm_backend, profile.model) + llm = self._get_backend(profile.llm_backend) if not session.messages: session.messages.append(Message(role="system", content=self._build_system_prompt(profile.system_prompt))) @@ -174,6 +175,7 @@ session.messages, tools=tool_schemas if tools else None, temperature=profile.temperature, + model=profile.model, ) if response.finish_reason == "stop" or not response.tool_calls: @@ -182,7 +184,7 @@ accumulated = "" thinking_active = False - async for chunk in llm.stream(final_messages, temperature=profile.temperature): + async for chunk in llm.stream(final_messages, temperature=profile.temperature, model=profile.model): if chunk.thinking: if not thinking_active: thinking_active = True @@ -247,8 +249,8 @@ pass return result - def _get_backend(self, backend_key: str, model: str) -> LLMBackend: - return self._backends.get(backend_key, model) + def _get_backend(self, backend_key: str) -> LLMBackend: + return self._backends.get(backend_key) async def _execute_tool_calls( self, tool_calls: list[ToolCallRequest], tools: list[Tool] diff --git a/navi/core/registry.py b/navi/core/registry.py index 725bddd..9cd133d 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -80,7 +80,7 @@ def register(self, key: str, backend: LLMBackend) -> None: self._backends[key] = backend - def get(self, key: str, model: str | None = None) -> LLMBackend: + def get(self, key: str) -> LLMBackend: backend = self._backends.get(key) if backend is None: raise KeyError(f"LLM backend '{key}' not registered") diff --git a/navi/core/session.py b/navi/core/session.py index 69004a0..4588e22 100644 --- a/navi/core/session.py +++ b/navi/core/session.py @@ -2,7 +2,7 @@ import uuid from abc import ABC, abstractmethod -from datetime import datetime +from datetime import datetime, timezone from pydantic import BaseModel, Field @@ -14,8 +14,8 @@ profile_id: str messages: list[Message] = Field(default_factory=list) pinned: bool = False - created_at: datetime = Field(default_factory=datetime.utcnow) - last_active: datetime = Field(default_factory=datetime.utcnow) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + last_active: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) class SessionStore(ABC): @@ -51,14 +51,14 @@ return self._sessions.get(session_id) async def save(self, session: Session) -> None: - session.last_active = datetime.utcnow() + session.last_active = datetime.now(timezone.utc) self._sessions[session.id] = session async def list_all(self) -> list[Session]: return sorted( self._sessions.values(), - key=lambda s: (not s.pinned, s.last_active), - reverse=False, + key=lambda s: (s.pinned, s.last_active), + reverse=True, ) async def delete(self, session_id: str) -> bool: diff --git a/navi/core/sqlite_session_store.py b/navi/core/sqlite_session_store.py index 736af6b..a28022f 100644 --- a/navi/core/sqlite_session_store.py +++ b/navi/core/sqlite_session_store.py @@ -2,7 +2,7 @@ import json import sqlite3 -from datetime import datetime +from datetime import datetime, timezone import aiosqlite @@ -57,7 +57,7 @@ return self._row_to_session(row) if row else None async def save(self, session: Session) -> None: - session.last_active = datetime.utcnow() + session.last_active = datetime.now(timezone.utc) messages_json = json.dumps( [m.model_dump(mode='json', exclude_none=True) for m in session.messages], ensure_ascii=False, diff --git a/navi/llm/base.py b/navi/llm/base.py index 4f2c765..f4d4a71 100644 --- a/navi/llm/base.py +++ b/navi/llm/base.py @@ -68,6 +68,7 @@ messages: list[Message], tools: list[ToolSchema] | None = None, temperature: float = 0.7, + model: str | None = None, ) -> LLMResponse: """Single-shot completion. Used in the agent tool-calling loop.""" @@ -76,5 +77,6 @@ self, messages: list[Message], temperature: float = 0.7, + model: str | None = None, ) -> AsyncGenerator[LLMChunk, None]: """Streaming text completion (no tool calling). Used for final response streaming.""" diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py index cf85016..ccefd79 100644 --- a/navi/llm/ollama.py +++ b/navi/llm/ollama.py @@ -30,8 +30,8 @@ return [t.model_dump() for t in tools] -def _base_options() -> dict: - opts: dict = {"temperature": 0.7, "num_ctx": settings.ollama_num_ctx} +def _base_options(temperature: float) -> dict: + opts: dict = {"temperature": temperature, "num_ctx": settings.ollama_num_ctx} if settings.ollama_think: opts["think"] = True return opts @@ -47,15 +47,13 @@ messages: list[Message], tools: list[ToolSchema] | None = None, temperature: float = 0.7, + model: str | None = None, ) -> LLMResponse: try: - opts = _base_options() - opts["temperature"] = temperature - kwargs: dict = { - "model": self.model, + "model": model or self.model, "messages": _to_ollama_messages(messages), - "options": opts, + "options": _base_options(temperature), "stream": False, } if tools: @@ -89,15 +87,13 @@ self, messages: list[Message], temperature: float = 0.7, + model: str | None = None, ) -> AsyncGenerator[LLMChunk, None]: try: - opts = _base_options() - opts["temperature"] = temperature - async for chunk in await self._client.chat( - model=self.model, + model=model or self.model, messages=_to_ollama_messages(messages), - options=opts, + options=_base_options(temperature), stream=True, ): thinking = getattr(chunk.message, "thinking", None) or None diff --git a/navi/llm/openai_backend.py b/navi/llm/openai_backend.py index 72dc338..0b4ba77 100644 --- a/navi/llm/openai_backend.py +++ b/navi/llm/openai_backend.py @@ -21,6 +21,7 @@ messages: list[Message], tools: list[ToolSchema] | None = None, temperature: float = 0.7, + model: str | None = None, ) -> LLMResponse: raise NotImplementedError("OpenAI backend not yet implemented") @@ -28,6 +29,7 @@ self, messages: list[Message], temperature: float = 0.7, + model: str | None = None, ) -> AsyncGenerator[LLMChunk, None]: raise NotImplementedError("OpenAI backend not yet implemented") yield # makes this a generator diff --git a/navi/profiles/secretary.py b/navi/profiles/secretary.py index d9f65b6..0611b1e 100644 --- a/navi/profiles/secretary.py +++ b/navi/profiles/secretary.py @@ -16,6 +16,6 @@ Output style: concise, structured. When researching, include sources. Match tone and format to what was asked.""", enabled_tools=["web_search", "http_request", "filesystem", "code_exec", "terminal", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], - model="gemma4:e4b-it-q8_0", + model="gemma4:e2b-it-q8_0", temperature=0.7, ) diff --git a/navi/profiles/server_admin.py b/navi/profiles/server_admin.py index 05bd752..aee9a39 100644 --- a/navi/profiles/server_admin.py +++ b/navi/profiles/server_admin.py @@ -18,6 +18,6 @@ Workflow: gather data first (logs, status, metrics), diagnose, then act. Before destructive or irreversible operations, state what you're about to do and why.""", enabled_tools=["terminal", "filesystem", "http_request", "web_search", "ssh_exec", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], - model="gemma4:e4b-it-q8_0", + model="gemma4:e2b-it-q8_0", temperature=0.2, ) diff --git a/navi/profiles/smart_home.py b/navi/profiles/smart_home.py index 4bc0770..9e4f507 100644 --- a/navi/profiles/smart_home.py +++ b/navi/profiles/smart_home.py @@ -18,6 +18,6 @@ Before writing any HA config to disk, validate structure in code_exec. Before toggling devices or triggering automations, confirm if the action is irreversible.""", enabled_tools=["http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], - model="gemma4:e4b-it-q8_0", + model="gemma4:e2b-it-q8_0", temperature=0.3, )