diff --git a/client/js/app.js b/client/js/app.js index 1d345f4..8b5e659 100644 --- a/client/js/app.js +++ b/client/js/app.js @@ -119,6 +119,9 @@ for (const msg of data.messages) { if (msg.role === 'system') continue; + // is_summary messages exist only in session.messages (display history), + // never in session.context. They are injected when the server loads a + // session whose context was compressed — display them as collapsible cards. if (msg.is_summary) { appendSummaryCard(messagesEl, msg.content ?? ''); continue; diff --git a/navi/api/deps.py b/navi/api/deps.py index 1381243..b58917d 100644 --- a/navi/api/deps.py +++ b/navi/api/deps.py @@ -1,6 +1,5 @@ """FastAPI dependency injection — provides shared singletons to route handlers.""" -from functools import lru_cache from typing import Annotated from fastapi import Depends @@ -18,9 +17,14 @@ from navi.workers import Worker, build_default_workers -@lru_cache +_registries: tuple[ToolRegistry, ProfileRegistry, BackendRegistry] | None = None + + def get_registries() -> tuple[ToolRegistry, ProfileRegistry, BackendRegistry]: - return build_default_registries() + global _registries + if _registries is None: + _registries = build_default_registries() + return _registries def get_tool_registry() -> ToolRegistry: @@ -36,15 +40,15 @@ _session_store = SqliteSessionStore(settings.db_path) +_workers: list[Worker] = build_default_workers() def get_session_store() -> SessionStore: return _session_store -@lru_cache def get_workers() -> list[Worker]: - return build_default_workers() + return _workers def get_agent( diff --git a/navi/core/__init__.py b/navi/core/__init__.py index 57b6759..0ada2bd 100644 --- a/navi/core/__init__.py +++ b/navi/core/__init__.py @@ -1,4 +1,5 @@ -from .agent import Agent, AgentEvent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent +from .agent import Agent +from .events import AgentEvent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent from .registry import BackendRegistry, ProfileRegistry, ToolRegistry, build_default_registries from .session import InMemorySessionStore, Session, SessionStore from .sqlite_session_store import SqliteSessionStore diff --git a/navi/core/agent.py b/navi/core/agent.py index 74ac68c..9ff833a 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -17,7 +17,6 @@ import asyncio import json -from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import TYPE_CHECKING, AsyncGenerator @@ -29,6 +28,14 @@ from navi.llm.base import LLMBackend, Message, ToolCallRequest from navi.tools.base import Tool +from .events import ( + AgentEvent, + StreamEnd, + TextDelta, + ThinkingDelta, + ThinkingEnd, + ToolEvent, +) from .registry import BackendRegistry, ProfileRegistry, ToolRegistry from .session import SessionStore @@ -48,55 +55,6 @@ log = structlog.get_logger() -@dataclass -class ToolEvent: - """Emitted during streaming to inform the client about tool activity.""" - - tool_name: str - arguments: dict - result: str - success: bool - - -@dataclass -class TextDelta: - """A chunk of text from the streaming LLM response.""" - - delta: str - - -@dataclass -class ThinkingDelta: - """A chunk of thinking/reasoning text from the streaming LLM response.""" - - delta: str - - -@dataclass -class ThinkingEnd: - """Marks the end of the thinking phase.""" - - -@dataclass -class StreamEnd: - """Marks the end of the streaming response.""" - - full_content: str - context_tokens: int | None = None # total tokens used in this turn - max_context_tokens: int = 0 # ollama_num_ctx from config - - -@dataclass -class ContextCompressed: - """Emitted after compression runs successfully.""" - - messages_before: int - messages_after: int - - -AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | ContextCompressed - - class Agent: def __init__( self, diff --git a/navi/core/compressor.py b/navi/core/compressor.py index 7f9f501..44adbcd 100644 --- a/navi/core/compressor.py +++ b/navi/core/compressor.py @@ -64,9 +64,16 @@ return to_summarize, to_keep -def _format_for_summary(messages: list[Message]) -> str: - """Render messages as plain text for the summarization prompt.""" +def _format_for_summary(messages: list[Message]) -> tuple[str, list[str]]: + """ + Render messages as plain text for the summarization prompt. + + Returns (text, images) where images is a flat list of base64 strings + collected from all user messages. Vision-capable models will receive + the images alongside the text; non-vision models silently ignore them. + """ lines: list[str] = [] + images: list[str] = [] i = 0 while i < len(messages): m = messages[i] @@ -77,8 +84,15 @@ i += 1 elif m.role == "user": + if m.images: + images.extend(m.images) + img_note = f" [+ {len(m.images)} image(s)]" + else: + img_note = "" if m.content: - lines.append(f"User: {m.content}") + lines.append(f"User: {m.content}{img_note}") + elif img_note: + lines.append(f"User:{img_note}") i += 1 elif m.role == "assistant" and m.tool_calls: @@ -100,7 +114,7 @@ else: i += 1 # skip orphan tool messages - return "\n".join(lines) + return "\n".join(lines), images async def compress_context( @@ -114,7 +128,15 @@ Summarize old messages in the LLM context and return a shorter context list. Only operates on `context` — the full display history (session.messages) is never touched. Returns None if there is nothing to compress. - Raises LLMBackendError on LLM failure — caller decides how to handle. + + Images from old user messages are passed to the summarization model. + Vision-capable models will incorporate image descriptions into the summary; + non-vision models silently ignore the images field. + + Uses the same model already loaded in memory (profile.model passed via WorkerContext) — + no model swap, no extra loading overhead. + + Exceptions propagate to the caller (CompressionWorker catches them). """ system_msgs = [m for m in context if m.role == "system"] to_summarize, to_keep = partition_messages(context, keep_recent) @@ -122,9 +144,10 @@ if len(to_summarize) < 2: return None # nothing substantial to compress + summary_text_input, images = _format_for_summary(to_summarize) prompt = [ Message(role="system", content=_SUMMARIZE_SYSTEM), - Message(role="user", content=_format_for_summary(to_summarize)), + Message(role="user", content=summary_text_input, images=images or None), ] response = await llm.complete(prompt, tools=None, temperature=temperature, model=model) diff --git a/navi/core/events.py b/navi/core/events.py new file mode 100644 index 0000000..96d4efc --- /dev/null +++ b/navi/core/events.py @@ -0,0 +1,52 @@ +"""Agent event dataclasses — emitted during run_stream() and forwarded to WebSocket clients.""" + +from dataclasses import dataclass + + +@dataclass +class ToolEvent: + """Emitted during streaming to inform the client about tool activity.""" + + tool_name: str + arguments: dict + result: str + success: bool + + +@dataclass +class TextDelta: + """A chunk of text from the streaming LLM response.""" + + delta: str + + +@dataclass +class ThinkingDelta: + """A chunk of thinking/reasoning text from the streaming LLM response.""" + + delta: str + + +@dataclass +class ThinkingEnd: + """Marks the end of the thinking phase.""" + + +@dataclass +class StreamEnd: + """Marks the end of the streaming response.""" + + full_content: str + context_tokens: int | None = None # total tokens used in this turn + max_context_tokens: int = 0 # ollama_num_ctx from config + + +@dataclass +class ContextCompressed: + """Emitted after context compression runs successfully.""" + + messages_before: int + messages_after: int + + +AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | ContextCompressed diff --git a/navi/workers/base.py b/navi/workers/base.py index 658f17e..0b206a9 100644 --- a/navi/workers/base.py +++ b/navi/workers/base.py @@ -4,7 +4,9 @@ from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING + +from navi.core.events import AgentEvent if TYPE_CHECKING: from navi.core.session import Session, SessionStore @@ -28,7 +30,7 @@ class WorkerResult: """Returned by a worker. `events` will be yielded to the WebSocket client.""" - events: list[Any] = field(default_factory=list) # list[AgentEvent] + events: list[AgentEvent] = field(default_factory=list) class Worker(ABC): diff --git a/navi/workers/compressor.py b/navi/workers/compressor.py index abd299e..8db9062 100644 --- a/navi/workers/compressor.py +++ b/navi/workers/compressor.py @@ -4,6 +4,7 @@ from navi.config import settings from navi.core.compressor import compress_context, should_compress +from navi.core.events import ContextCompressed from .base import Worker, WorkerContext, WorkerResult @@ -51,8 +52,6 @@ after=len(session.context), ) - # Import here to avoid circular dependency - from navi.core.agent import ContextCompressed return WorkerResult(events=[ContextCompressed( messages_before=count_before, messages_after=len(session.context),