diff --git a/client/js/app.js b/client/js/app.js
index b6d7311..1d345f4 100644
--- a/client/js/app.js
+++ b/client/js/app.js
@@ -3,7 +3,8 @@
 import { appendMessage, appendStreamBubble, finalizeStreamBubble,
          appendToolCall, appendThinkingCard, finalizeThinkingCard,
          appendTypingIndicator, removeTypingIndicator,
-         appendError, showEmptyState, scrollToBottom }       from './chat.js';
+         appendError, showEmptyState, scrollToBottom,
+         appendSummaryCard, appendCompressionNotice }         from './chat.js';
 import { renderProfiles, renderSessions, updateChatHeader }  from './sidebar.js';
 
 // ── DOM refs ─────────────────────────────────────────────────────────────────
@@ -118,6 +119,11 @@
     for (const msg of data.messages) {
       if (msg.role === 'system') continue;
 
+      if (msg.is_summary) {
+        appendSummaryCard(messagesEl, msg.content ?? '');
+        continue;
+      }
+
       if (msg.role === 'tool') {
         const tc = toolCallMap[msg.tool_call_id] ?? { name: msg.name ?? '?', args: {} };
         const success = !msg.content?.startsWith('Error:');
@@ -218,6 +224,11 @@
       setInputEnabled(true);
       break;
 
+    case 'context_compressed':
+      appendCompressionNotice(messagesEl);
+      scrollToBottom(messagesEl);
+      break;
+
     case 'error':
       finishStream();
       appendError(messagesEl, event.message);
diff --git a/client/js/chat.js b/client/js/chat.js
index 5d2eb47..1675c74 100644
--- a/client/js/chat.js
+++ b/client/js/chat.js
@@ -207,6 +207,38 @@
     </div>`;
 }
 
+/**
+ * Summary card — rendered for is_summary messages loaded from history.
+ * Collapsed by default, click to expand.
+ */
+export function appendSummaryCard(el, content) {
+  const text = content.replace(/^\[Context Summary\]\n?/, '');
+  const card = document.createElement('div');
+  card.className = 'summary-card';
+
+  const header = document.createElement('div');
+  header.className = 'summary-header';
+  header.innerHTML = '<span class="summary-icon">📋</span><span>Context Summary</span>';
+
+  const body = document.createElement('div');
+  body.className = 'summary-body';
+  body.appendChild(renderMarkdown(text));
+
+  header.addEventListener('click', () => card.classList.toggle('open'));
+  card.append(header, body);
+  el.appendChild(card);
+}
+
+/**
+ * Inline notice that compression ran — appended to the message list.
+ */
+export function appendCompressionNotice(el) {
+  const div = document.createElement('div');
+  div.className = 'compression-notice';
+  div.textContent = '↑ Older messages summarized to free context space';
+  el.appendChild(div);
+}
+
 export function scrollToBottom(el) {
   el.scrollTop = el.scrollHeight;
 }
diff --git a/client/style.css b/client/style.css
index 8009bfc..9bb8dc4 100644
--- a/client/style.css
+++ b/client/style.css
@@ -329,6 +329,57 @@
   color: var(--text);
 }
 
+/* ── Context summary card (compressed history) ───────── */
+
+.summary-card {
+  align-self: flex-start;
+  max-width: 84%;
+  background: #161b22;
+  border: 1px solid #30363d;
+  border-radius: var(--radius);
+  font-size: 12px;
+  color: var(--text-muted);
+}
+
+.summary-header {
+  display: flex;
+  align-items: center;
+  gap: 7px;
+  padding: 8px 12px;
+  cursor: pointer;
+  user-select: none;
+  font-weight: 600;
+  border-radius: var(--radius);
+}
+.summary-header:hover { background: rgba(255,255,255,0.03); }
+.summary-icon { font-size: 13px; }
+.summary-card:not(.open) .summary-header::after { content: '›'; font-size: 16px; opacity: 0.5; }
+.summary-card.open       .summary-header::after { content: '‹'; font-size: 16px; opacity: 0.5; }
+
+.summary-body {
+  border-top: 1px solid #30363d;
+  padding: 8px 12px;
+  display: none;
+  color: var(--text-muted);
+}
+.summary-card.open .summary-body {
+  display: block;
+  animation: fadeSlide 0.18s ease;
+}
+.summary-body .prose { font-size: 12px; line-height: 1.55; }
+
+/* Compression notice — inline divider */
+.compression-notice {
+  align-self: center;
+  font-size: 11px;
+  color: var(--text-muted);
+  padding: 2px 12px;
+  border-radius: 99px;
+  border: 1px solid #30363d;
+  background: #161b22;
+  opacity: 0.7;
+}
+
 /* ── Thinking card ───────────────────────────────────── */
 
 .thinking-card {
diff --git a/navi/api/websocket.py b/navi/api/websocket.py
index 321ddbf..2721d0e 100644
--- a/navi/api/websocket.py
+++ b/navi/api/websocket.py
@@ -20,7 +20,7 @@
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 
 from navi.api.deps import get_agent, get_session_store
-from navi.core import Agent, InMemorySessionStore, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
+from navi.core import Agent, ContextCompressed, InMemorySessionStore, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
 from navi.exceptions import MaxIterationsReached, NaviError, SessionNotFound
 
 router = APIRouter(tags=["websocket"])
@@ -95,6 +95,12 @@
                             "context_tokens": event.context_tokens,
                             "max_context_tokens": event.max_context_tokens,
                         })
+                    elif isinstance(event, ContextCompressed):
+                        await websocket.send_json({
+                            "type": "context_compressed",
+                            "messages_before": event.messages_before,
+                            "messages_after": event.messages_after,
+                        })
 
             except SessionNotFound:
                 await websocket.send_json({"type": "error", "message": "Session not found"})
diff --git a/navi/config.py b/navi/config.py
index 18a5db8..6c676fc 100644
--- a/navi/config.py
+++ b/navi/config.py
@@ -5,7 +5,7 @@
     model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
 
     ollama_host: str = "http://localhost:11434"
-    ollama_default_model: str = "gemma4:e2b-it-q8_0"
+    ollama_default_model: str = "gemma4:e4b-it-q8_0"
     ollama_num_ctx: int = 65536
     ollama_think: bool = True
 
@@ -30,6 +30,12 @@
     # Directory for user-defined tools (auto-discovered at startup)
     tools_dir: str = "tools"
 
+    # Context compression
+    context_compression_enabled: bool = True
+    context_compression_threshold: float = 0.80   # trigger at 80% of ollama_num_ctx
+    context_keep_recent: int = 6                   # conversational turns to keep verbatim
+    context_summary_temperature: float = 0.3
+
     # Global personality prompt prepended to every agent's system prompt.
     # Override via NAVI_PERSONA env var or .env file.
     navi_persona: str = ""
diff --git a/navi/core/__init__.py b/navi/core/__init__.py
index db502b5..57b6759 100644
--- a/navi/core/__init__.py
+++ b/navi/core/__init__.py
@@ -1,4 +1,4 @@
-from .agent import Agent, AgentEvent, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
+from .agent import Agent, AgentEvent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
 from .registry import BackendRegistry, ProfileRegistry, ToolRegistry, build_default_registries
 from .session import InMemorySessionStore, Session, SessionStore
 from .sqlite_session_store import SqliteSessionStore
@@ -11,6 +11,7 @@
     "ThinkingDelta",
     "ThinkingEnd",
     "ToolEvent",
+    "ContextCompressed",
     "BackendRegistry",
     "ProfileRegistry",
     "ToolRegistry",
diff --git a/navi/core/agent.py b/navi/core/agent.py
index ec5690b..60547b4 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -28,6 +28,7 @@
 from navi.llm.base import LLMBackend, Message, ToolCallRequest
 from navi.tools.base import Tool
 
+from .compressor import compress_session, should_compress
 from .registry import BackendRegistry, ProfileRegistry, ToolRegistry
 from .session import SessionStore
 
@@ -81,7 +82,15 @@
     max_context_tokens: int = 0         # ollama_num_ctx from config
 
 
-AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd
+@dataclass
+class ContextCompressed:
+    """Emitted after compression runs successfully."""
+
+    messages_before: int
+    messages_after: int
+
+
+AgentEvent = ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd | StreamEnd | ContextCompressed
 
 
 class Agent:
@@ -206,11 +215,44 @@
 
                 session.messages.append(Message(role="assistant", content=accumulated, created_at=datetime.now(timezone.utc)))
                 await self._sessions.save(session)
+
                 yield StreamEnd(
                     full_content=accumulated,
                     context_tokens=context_tokens,
                     max_context_tokens=settings.ollama_num_ctx,
                 )
+
+                # Post-response compression — runs after client receives StreamEnd
+                if (
+                    settings.context_compression_enabled
+                    and context_tokens is not None
+                    and should_compress(context_tokens, settings.ollama_num_ctx, settings.context_compression_threshold)
+                ):
+                    count_before = len(session.messages)
+                    try:
+                        new_messages = await compress_session(
+                            messages=session.messages,
+                            llm=llm,
+                            model=profile.model,
+                            temperature=settings.context_summary_temperature,
+                            keep_recent=settings.context_keep_recent,
+                        )
+                        if new_messages is not None:
+                            session.messages = new_messages
+                            await self._sessions.save(session)
+                            log.info(
+                                "agent.compressed",
+                                session_id=session_id,
+                                before=count_before,
+                                after=len(session.messages),
+                            )
+                            yield ContextCompressed(
+                                messages_before=count_before,
+                                messages_after=len(session.messages),
+                            )
+                    except Exception:
+                        log.warning("agent.compress_failed", session_id=session_id, exc_info=True)
+
                 return
 
             # Tool calls: emit events, execute, continue loop
diff --git a/navi/core/compressor.py b/navi/core/compressor.py
new file mode 100644
index 0000000..492dced
--- /dev/null
+++ b/navi/core/compressor.py
@@ -0,0 +1,140 @@
+"""
+Context compressor — summarizes old messages to stay within the token limit.
+
+Flow:
+1. Partition session messages into "to_summarize" (old turns) and "to_keep" (recent turns).
+2. Call the LLM to produce a concise bullet-point summary of the old turns.
+3. Replace the old turns with a single summary message (role=user, is_summary=True).
+
+A "turn" is one user message plus all following assistant/tool messages up to the
+next user message. Tool call groups (assistant + tool results) are never split.
+Existing summary messages are always folded into the next compression pass.
+"""
+
+import json
+from datetime import datetime, timezone
+
+from navi.llm.base import LLMBackend, Message
+
+_SUMMARIZE_SYSTEM = (
+    "You are summarizing a conversation history to free up context space. "
+    "Produce a concise factual summary covering: key facts the user shared, "
+    "decisions made, tasks completed or in progress, important outputs or findings. "
+    "Use bullet points. Be brief — this summary replaces the conversation in context. "
+    "Do not include greetings or filler."
+)
+
+
+def should_compress(context_tokens: int, max_context_tokens: int, threshold: float) -> bool:
+    return context_tokens >= int(max_context_tokens * threshold)
+
+
+def partition_messages(
+    messages: list[Message], keep_recent: int
+) -> tuple[list[Message], list[Message]]:
+    """
+    Returns (to_summarize, to_keep).
+
+    Keeps the system message and the last `keep_recent` conversational turns verbatim.
+    Everything older goes into to_summarize.
+    Tool call groups (assistant + tool results) always stay together.
+    """
+    non_system = [m for m in messages if m.role != "system"]
+
+    # Group into turns: each turn starts with a user message
+    turns: list[list[Message]] = []
+    current: list[Message] = []
+    for msg in non_system:
+        if msg.role == "user" and current:
+            turns.append(current)
+            current = [msg]
+        else:
+            current.append(msg)
+    if current:
+        turns.append(current)
+
+    if len(turns) <= keep_recent:
+        return [], non_system  # nothing old enough to compress
+
+    old_turns = turns[:-keep_recent]
+    recent_turns = turns[-keep_recent:]
+
+    to_summarize = [m for turn in old_turns for m in turn]
+    to_keep = [m for turn in recent_turns for m in turn]
+    return to_summarize, to_keep
+
+
+def _format_for_summary(messages: list[Message]) -> str:
+    """Render messages as plain text for the summarization prompt."""
+    lines: list[str] = []
+    i = 0
+    while i < len(messages):
+        m = messages[i]
+
+        if m.is_summary:
+            # Existing summary — include as-is (already compressed)
+            lines.append(m.content or "")
+            i += 1
+
+        elif m.role == "user":
+            if m.content:
+                lines.append(f"User: {m.content}")
+            i += 1
+
+        elif m.role == "assistant" and m.tool_calls:
+            # Render tool calls + their results as a compact block
+            for tc in m.tool_calls:
+                args_preview = json.dumps(tc.arguments)[:120]
+                lines.append(f"[Called tool `{tc.name}` with {args_preview}]")
+            i += 1
+            while i < len(messages) and messages[i].role == "tool":
+                result = messages[i].content or ""
+                preview = result[:300] + ("…" if len(result) > 300 else "")
+                lines.append(f"[Tool `{messages[i].name}` returned: {preview}]")
+                i += 1
+
+        elif m.role == "assistant" and m.content:
+            lines.append(f"Assistant: {m.content}")
+            i += 1
+
+        else:
+            i += 1  # skip orphan tool messages
+
+    return "\n".join(lines)
+
+
+async def compress_session(
+    messages: list[Message],
+    llm: LLMBackend,
+    model: str,
+    temperature: float,
+    keep_recent: int,
+) -> list[Message] | None:
+    """
+    Summarize old messages and return a new (shorter) message list.
+    Returns None if there is nothing to compress or if the LLM call fails.
+
+    Raises LLMBackendError on LLM failure — caller decides how to handle.
+    """
+    system_msgs = [m for m in messages if m.role == "system"]
+    to_summarize, to_keep = partition_messages(messages, keep_recent)
+
+    if len(to_summarize) < 2:
+        return None  # nothing substantial to compress
+
+    prompt = [
+        Message(role="system", content=_SUMMARIZE_SYSTEM),
+        Message(role="user", content=_format_for_summary(to_summarize)),
+    ]
+
+    response = await llm.complete(prompt, tools=None, temperature=temperature, model=model)
+    summary_text = (response.content or "").strip() or "(summary unavailable)"
+
+    summary_msg = Message(
+        role="user",
+        content=f"[Context Summary]\n{summary_text}",
+        is_summary=True,
+        created_at=datetime.now(timezone.utc),
+    )
+
+    return system_msgs + [summary_msg] + to_keep
diff --git a/navi/llm/base.py b/navi/llm/base.py
index 88e8946..6c865b1 100644
--- a/navi/llm/base.py
+++ b/navi/llm/base.py
@@ -40,6 +40,8 @@
     tool_call_id: str | None = None
     name: str | None = None  # tool name on tool result messages
     created_at: datetime | None = None
+    # marks a compressed history block injected by the context compressor
+    is_summary: bool = False
 
 
 class LLMResponse(BaseModel):