diff --git a/navi/api/websocket.py b/navi/api/websocket.py index fa7541b..e00d272 100644 --- a/navi/api/websocket.py +++ b/navi/api/websocket.py @@ -6,6 +6,8 @@ Protocol (server -> client): {"type": "stream_start"} + {"type": "thinking_delta", "delta": "..."} # reasoning chunk + {"type": "thinking_end"} # reasoning done {"type": "stream_delta", "delta": "..."} # text chunk {"type": "tool_call", "tool": "...", "args": {...}, "result": "...", "success": bool} {"type": "stream_end", "content": "..."} # full assembled response @@ -18,7 +20,7 @@ from fastapi import APIRouter, WebSocket, WebSocketDisconnect from navi.api.deps import get_agent, get_session_store -from navi.core import Agent, InMemorySessionStore, StreamEnd, TextDelta, ToolEvent +from navi.core import Agent, InMemorySessionStore, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent from navi.exceptions import MaxIterationsReached, NaviError, SessionNotFound router = APIRouter(tags=["websocket"]) @@ -72,7 +74,11 @@ try: async for event in agent.run_stream(session_id, user_content, images=raw_images): - if isinstance(event, TextDelta): + if isinstance(event, ThinkingDelta): + await websocket.send_json({"type": "thinking_delta", "delta": event.delta}) + elif isinstance(event, ThinkingEnd): + await websocket.send_json({"type": "thinking_end"}) + elif isinstance(event, TextDelta): await websocket.send_json({"type": "stream_delta", "delta": event.delta}) elif isinstance(event, ToolEvent): await websocket.send_json({ diff --git a/navi/config.py b/navi/config.py index d6b761f..6fc37d2 100644 --- a/navi/config.py +++ b/navi/config.py @@ -5,7 +5,9 @@ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") ollama_host: str = "http://localhost:11434" - ollama_default_model: str = "gemma4:e2b-it-q4_K_M" + ollama_default_model: str = "gemma4:e2b-it-q8_0" + ollama_num_ctx: int = 8192 + ollama_think: bool = True openai_api_key: str = "" anthropic_api_key: str = "" @@ -25,6 +27,13 @@ log_level: str = "INFO" + # Directory for user-defined tools (auto-discovered at startup) + tools_dir: str = "tools" + + # Global personality prompt prepended to every agent's system prompt. + # Override via NAVI_PERSONA env var or .env file. + navi_persona: str = "" + @property def fs_allowed_paths_list(self) -> list[str]: return [p.strip() for p in self.fs_allowed_paths.split(",") if p.strip()] diff --git a/navi/core/__init__.py b/navi/core/__init__.py index 2601b6d..db502b5 100644 --- a/navi/core/__init__.py +++ b/navi/core/__init__.py @@ -1,4 +1,4 @@ -from .agent import Agent, AgentEvent, StreamEnd, TextDelta, ToolEvent +from .agent import Agent, AgentEvent, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent from .registry import BackendRegistry, ProfileRegistry, ToolRegistry, build_default_registries from .session import InMemorySessionStore, Session, SessionStore from .sqlite_session_store import SqliteSessionStore @@ -8,6 +8,8 @@ "AgentEvent", "StreamEnd", "TextDelta", + "ThinkingDelta", + "ThinkingEnd", "ToolEvent", "BackendRegistry", "ProfileRegistry", diff --git a/navi/core/agent.py b/navi/core/agent.py index 336b793..ef36fa7 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -18,10 +18,12 @@ import json from dataclasses import dataclass from datetime import datetime, timezone +from pathlib import Path from typing import AsyncGenerator import structlog +from navi.config import settings from navi.exceptions import MaxIterationsReached, SessionNotFound from navi.llm.base import LLMBackend, Message, ToolCallRequest from navi.tools.base import Tool @@ -29,6 +31,15 @@ from .registry import BackendRegistry, ProfileRegistry, ToolRegistry from .session import SessionStore +_USER_ENABLED_FILE = Path(settings.tools_dir) / "enabled.json" + + +def _load_user_enabled_tools() -> list[str]: + try: + return json.loads(_USER_ENABLED_FILE.read_text()) + except Exception: + return [] + log = structlog.get_logger() @@ -50,13 +61,25 @@ @dataclass +class ThinkingDelta: + """A chunk of thinking/reasoning text from the streaming LLM response.""" + + delta: str + + +@dataclass +class ThinkingEnd: + """Marks the end of the thinking phase.""" + + +@dataclass class StreamEnd: """Marks the end of the streaming response.""" full_content: str -AgentEvent = ToolEvent | TextDelta | StreamEnd +AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd class Agent: @@ -89,7 +112,7 @@ # Inject system prompt on first message if not session.messages: - session.messages.append(Message(role="system", content=profile.system_prompt)) + session.messages.append(Message(role="system", content=self._build_system_prompt(profile.system_prompt))) session.messages.append(Message(role="user", content=user_message, images=images or None, created_at=datetime.now(timezone.utc))) @@ -141,7 +164,7 @@ llm = self._get_backend(profile.llm_backend, profile.model) if not session.messages: - session.messages.append(Message(role="system", content=profile.system_prompt)) + session.messages.append(Message(role="system", content=self._build_system_prompt(profile.system_prompt))) session.messages.append(Message(role="user", content=user_message, images=images or None, created_at=datetime.now(timezone.utc))) @@ -155,15 +178,25 @@ if response.finish_reason == "stop" or not response.tool_calls: # Switch to streaming for the final text response - # Re-use the already-received content, stream it as one delta final_messages = session.messages.copy() accumulated = "" + thinking_active = False async for chunk in llm.stream(final_messages, temperature=profile.temperature): - if chunk.delta: + if chunk.thinking: + if not thinking_active: + thinking_active = True + yield ThinkingDelta(delta=chunk.thinking) + elif chunk.delta: + if thinking_active: + thinking_active = False + yield ThinkingEnd() accumulated += chunk.delta yield TextDelta(delta=chunk.delta) + if thinking_active: + yield ThinkingEnd() + session.messages.append(Message(role="assistant", content=accumulated, created_at=datetime.now(timezone.utc))) await self._sessions.save(session) yield StreamEnd(full_content=accumulated) @@ -192,8 +225,27 @@ # Internal helpers # ------------------------------------------------------------------ + def _build_system_prompt(self, profile_prompt: str) -> str: + persona = settings.navi_persona.strip() + if persona: + return f"{persona}\n\n---\n\n{profile_prompt}" + return profile_prompt + def _tool_list(self, enabled: list[str]) -> list[Tool]: - return self._tools.resolve(enabled) + names = list(enabled) + # Merge in user-created tools from tools/enabled.json + extra = _load_user_enabled_tools() + for name in extra: + if name not in names: + names.append(name) + # Silently skip any names not registered (e.g. tool was deleted) + result = [] + for name in names: + try: + result.append(self._tools.get(name)) + except Exception: + pass + return result def _get_backend(self, backend_key: str, model: str) -> LLMBackend: return self._backends.get(backend_key, model) diff --git a/navi/llm/base.py b/navi/llm/base.py index fea77d0..4f2c765 100644 --- a/navi/llm/base.py +++ b/navi/llm/base.py @@ -48,12 +48,14 @@ content: str | None tool_calls: list[ToolCallRequest] | None finish_reason: str # "stop" | "tool_calls" | "length" + thinking: str | None = None class LLMChunk(BaseModel): """A single chunk from a streaming LLM response.""" delta: str | None = None + thinking: str | None = None finish_reason: str | None = None # "stop" | "length"; None while streaming diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py index 281ec87..cf85016 100644 --- a/navi/llm/ollama.py +++ b/navi/llm/ollama.py @@ -5,6 +5,7 @@ import ollama as ollama_client +from navi.config import settings from navi.exceptions import LLMBackendError from .base import LLMBackend, LLMChunk, LLMResponse, Message, ToolCallRequest, ToolSchema @@ -29,6 +30,13 @@ return [t.model_dump() for t in tools] +def _base_options() -> dict: + opts: dict = {"temperature": 0.7, "num_ctx": settings.ollama_num_ctx} + if settings.ollama_think: + opts["think"] = True + return opts + + class OllamaBackend(LLMBackend): def __init__(self, model: str, host: str = "http://localhost:11434"): self.model = model @@ -41,10 +49,13 @@ temperature: float = 0.7, ) -> LLMResponse: try: + opts = _base_options() + opts["temperature"] = temperature + kwargs: dict = { "model": self.model, "messages": _to_ollama_messages(messages), - "options": {"temperature": temperature}, + "options": opts, "stream": False, } if tools: @@ -69,6 +80,7 @@ content=msg.content or None, tool_calls=tool_calls, finish_reason=finish_reason, + thinking=getattr(msg, "thinking", None) or None, ) except Exception as e: raise LLMBackendError(str(e)) from e @@ -79,14 +91,18 @@ temperature: float = 0.7, ) -> AsyncGenerator[LLMChunk, None]: try: + opts = _base_options() + opts["temperature"] = temperature + async for chunk in await self._client.chat( model=self.model, messages=_to_ollama_messages(messages), - options={"temperature": temperature}, + options=opts, stream=True, ): + thinking = getattr(chunk.message, "thinking", None) or None delta = chunk.message.content or None finish_reason = "stop" if chunk.done else None - yield LLMChunk(delta=delta, finish_reason=finish_reason) + yield LLMChunk(delta=delta, thinking=thinking, finish_reason=finish_reason) except Exception as e: raise LLMBackendError(str(e)) from e