diff --git a/client/js/app.js b/client/js/app.js
index 1d345f4..8b5e659 100644
--- a/client/js/app.js
+++ b/client/js/app.js
@@ -119,6 +119,9 @@
     for (const msg of data.messages) {
       if (msg.role === 'system') continue;
 
+      // is_summary messages exist only in session.messages (display history),
+      // never in session.context. They are injected when the server loads a
+      // session whose context was compressed — display them as collapsible cards.
       if (msg.is_summary) {
         appendSummaryCard(messagesEl, msg.content ?? '');
         continue;
diff --git a/navi/api/deps.py b/navi/api/deps.py
index 1381243..b58917d 100644
--- a/navi/api/deps.py
+++ b/navi/api/deps.py
@@ -1,6 +1,5 @@
 """FastAPI dependency injection — provides shared singletons to route handlers."""
 
-from functools import lru_cache
 from typing import Annotated
 
 from fastapi import Depends
@@ -18,9 +17,14 @@
 from navi.workers import Worker, build_default_workers
 
 
-@lru_cache
+_registries: tuple[ToolRegistry, ProfileRegistry, BackendRegistry] | None = None
+
+
 def get_registries() -> tuple[ToolRegistry, ProfileRegistry, BackendRegistry]:
-    return build_default_registries()
+    global _registries
+    if _registries is None:
+        _registries = build_default_registries()
+    return _registries
 
 
 def get_tool_registry() -> ToolRegistry:
@@ -36,15 +40,15 @@
 
 
 _session_store = SqliteSessionStore(settings.db_path)
+_workers: list[Worker] = build_default_workers()
 
 
 def get_session_store() -> SessionStore:
     return _session_store
 
 
-@lru_cache
 def get_workers() -> list[Worker]:
-    return build_default_workers()
+    return _workers
 
 
 def get_agent(
diff --git a/navi/core/__init__.py b/navi/core/__init__.py
index 57b6759..0ada2bd 100644
--- a/navi/core/__init__.py
+++ b/navi/core/__init__.py
@@ -1,4 +1,5 @@
-from .agent import Agent, AgentEvent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
+from .agent import Agent
+from .events import AgentEvent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
 from .registry import BackendRegistry, ProfileRegistry, ToolRegistry, build_default_registries
 from .session import InMemorySessionStore, Session, SessionStore
 from .sqlite_session_store import SqliteSessionStore
diff --git a/navi/core/agent.py b/navi/core/agent.py
index 74ac68c..9ff833a 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -17,7 +17,6 @@
 
 import asyncio
 import json
-from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import TYPE_CHECKING, AsyncGenerator
@@ -29,6 +28,14 @@
 from navi.llm.base import LLMBackend, Message, ToolCallRequest
 from navi.tools.base import Tool
 
+from .events import (
+    AgentEvent,
+    StreamEnd,
+    TextDelta,
+    ThinkingDelta,
+    ThinkingEnd,
+    ToolEvent,
+)
 from .registry import BackendRegistry, ProfileRegistry, ToolRegistry
 from .session import SessionStore
 
@@ -48,55 +55,6 @@
 log = structlog.get_logger()
 
 
-@dataclass
-class ToolEvent:
-    """Emitted during streaming to inform the client about tool activity."""
-
-    tool_name: str
-    arguments: dict
-    result: str
-    success: bool
-
-
-@dataclass
-class TextDelta:
-    """A chunk of text from the streaming LLM response."""
-
-    delta: str
-
-
-@dataclass
-class ThinkingDelta:
-    """A chunk of thinking/reasoning text from the streaming LLM response."""
-
-    delta: str
-
-
-@dataclass
-class ThinkingEnd:
-    """Marks the end of the thinking phase."""
-
-
-@dataclass
-class StreamEnd:
-    """Marks the end of the streaming response."""
-
-    full_content: str
-    context_tokens: int | None = None   # total tokens used in this turn
-    max_context_tokens: int = 0         # ollama_num_ctx from config
-
-
-@dataclass
-class ContextCompressed:
-    """Emitted after compression runs successfully."""
-
-    messages_before: int
-    messages_after: int
-
-
-AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | ContextCompressed
-
-
 class Agent:
     def __init__(
         self,
diff --git a/navi/core/compressor.py b/navi/core/compressor.py
index 7f9f501..44adbcd 100644
--- a/navi/core/compressor.py
+++ b/navi/core/compressor.py
@@ -64,9 +64,16 @@
     return to_summarize, to_keep
 
 
-def _format_for_summary(messages: list[Message]) -> str:
-    """Render messages as plain text for the summarization prompt."""
+def _format_for_summary(messages: list[Message]) -> tuple[str, list[str]]:
+    """
+    Render messages as plain text for the summarization prompt.
+
+    Returns (text, images) where images is a flat list of base64 strings
+    collected from all user messages. Vision-capable models will receive
+    the images alongside the text; non-vision models silently ignore them.
+    """
     lines: list[str] = []
+    images: list[str] = []
     i = 0
     while i < len(messages):
         m = messages[i]
@@ -77,8 +84,15 @@
             i += 1
 
         elif m.role == "user":
+            if m.images:
+                images.extend(m.images)
+                img_note = f" [+ {len(m.images)} image(s)]"
+            else:
+                img_note = ""
             if m.content:
-                lines.append(f"User: {m.content}")
+                lines.append(f"User: {m.content}{img_note}")
+            elif img_note:
+                lines.append(f"User:{img_note}")
             i += 1
 
         elif m.role == "assistant" and m.tool_calls:
@@ -100,7 +114,7 @@
         else:
             i += 1  # skip orphan tool messages
 
-    return "\n".join(lines)
+    return "\n".join(lines), images
 
 
 async def compress_context(
@@ -114,7 +128,15 @@
     Summarize old messages in the LLM context and return a shorter context list.
     Only operates on `context` — the full display history (session.messages) is never touched.
     Returns None if there is nothing to compress.
-    Raises LLMBackendError on LLM failure — caller decides how to handle.
+
+    Images from old user messages are passed to the summarization model.
+    Vision-capable models will incorporate image descriptions into the summary;
+    non-vision models silently ignore the images field.
+
+    Uses the same model already loaded in memory (profile.model passed via WorkerContext) —
+    no model swap, no extra loading overhead.
+
+    Exceptions propagate to the caller (CompressionWorker catches them).
     """
     system_msgs = [m for m in context if m.role == "system"]
     to_summarize, to_keep = partition_messages(context, keep_recent)
@@ -122,9 +144,10 @@
     if len(to_summarize) < 2:
         return None  # nothing substantial to compress
 
+    summary_text_input, images = _format_for_summary(to_summarize)
     prompt = [
         Message(role="system", content=_SUMMARIZE_SYSTEM),
-        Message(role="user", content=_format_for_summary(to_summarize)),
+        Message(role="user", content=summary_text_input, images=images or None),
     ]
 
     response = await llm.complete(prompt, tools=None, temperature=temperature, model=model)
diff --git a/navi/core/events.py b/navi/core/events.py
new file mode 100644
index 0000000..96d4efc
--- /dev/null
+++ b/navi/core/events.py
@@ -0,0 +1,52 @@
+"""Agent event dataclasses — emitted during run_stream() and forwarded to WebSocket clients."""
+
+from dataclasses import dataclass
+
+
+@dataclass
+class ToolEvent:
+    """Emitted during streaming to inform the client about tool activity."""
+
+    tool_name: str
+    arguments: dict
+    result: str
+    success: bool
+
+
+@dataclass
+class TextDelta:
+    """A chunk of text from the streaming LLM response."""
+
+    delta: str
+
+
+@dataclass
+class ThinkingDelta:
+    """A chunk of thinking/reasoning text from the streaming LLM response."""
+
+    delta: str
+
+
+@dataclass
+class ThinkingEnd:
+    """Marks the end of the thinking phase."""
+
+
+@dataclass
+class StreamEnd:
+    """Marks the end of the streaming response."""
+
+    full_content: str
+    context_tokens: int | None = None   # total tokens used in this turn
+    max_context_tokens: int = 0         # ollama_num_ctx from config
+
+
+@dataclass
+class ContextCompressed:
+    """Emitted after context compression runs successfully."""
+
+    messages_before: int
+    messages_after: int
+
+
+AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | ContextCompressed
diff --git a/navi/workers/base.py b/navi/workers/base.py
index 658f17e..0b206a9 100644
--- a/navi/workers/base.py
+++ b/navi/workers/base.py
@@ -4,7 +4,9 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
+
+from navi.core.events import AgentEvent
 
 if TYPE_CHECKING:
     from navi.core.session import Session, SessionStore
@@ -28,7 +30,7 @@
 class WorkerResult:
     """Returned by a worker. `events` will be yielded to the WebSocket client."""
 
-    events: list[Any] = field(default_factory=list)  # list[AgentEvent]
+    events: list[AgentEvent] = field(default_factory=list)
 
 
 class Worker(ABC):
diff --git a/navi/workers/compressor.py b/navi/workers/compressor.py
index abd299e..8db9062 100644
--- a/navi/workers/compressor.py
+++ b/navi/workers/compressor.py
@@ -4,6 +4,7 @@
 
 from navi.config import settings
 from navi.core.compressor import compress_context, should_compress
+from navi.core.events import ContextCompressed
 
 from .base import Worker, WorkerContext, WorkerResult
 
@@ -51,8 +52,6 @@
             after=len(session.context),
         )
 
-        # Import here to avoid circular dependency
-        from navi.core.agent import ContextCompressed
         return WorkerResult(events=[ContextCompressed(
             messages_before=count_before,
             messages_after=len(session.context),