diff --git a/docs/config.md b/docs/config.md
index 73ffb71..cca852a 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -133,7 +133,7 @@
 | `CONTEXT_COMPRESSION_THRESHOLD` | float | `0.70` | Trigger compression at this fraction of `OLLAMA_NUM_CTX` |
 | `CONTEXT_KEEP_RECENT` | int | `8` | Number of recent conversation turns to keep verbatim |
 | `CONTEXT_SUMMARY_TEMPERATURE` | float | `0.3` | Temperature for the summarization LLM call |
-| `CONTEXT_SUMMARY_MAX_TOKENS` | int | `3000` | Max output tokens for the summary LLM call |
+| `CONTEXT_SUMMARY_MAX_TOKENS` | int | `4000` | Max output tokens for the summary LLM call |
 | `OUTPUT_RESERVE_TOKENS` | int | `2048` | Headroom reserved for model response in context size checks |
 
 ## Gmail
diff --git a/navi/config.py b/navi/config.py
index 53ee5f9..53c2411 100644
--- a/navi/config.py
+++ b/navi/config.py
@@ -122,7 +122,7 @@
     context_compression_threshold: float = 0.70   # trigger at 70% of ollama_num_ctx
     context_keep_recent: int = 8                   # conversational turns to keep verbatim
     context_summary_temperature: float = 0.3
-    context_summary_max_tokens: int = 3000         # max output tokens for the summary LLM call
+    context_summary_max_tokens: int = 4000         # max output tokens for the summary LLM call
     output_reserve_tokens: int = 2048              # headroom reserved for model response in context checks
 
     # Global personality prompt prepended to every agent's system prompt.
diff --git a/navi/core/agent.py b/navi/core/agent.py
index 12090d8..978e67d 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -178,6 +178,11 @@
             mcp_manager=mcp_manager,
         )
 
+    def _set_active_profile(self, profile) -> None:
+        """Update cached profile and propagate to subsystems that depend on it."""
+        self._compressor.set_profile(profile)
+        self._subagent.set_profile(profile)
+
     # ------------------------------------------------------------------
     # Public interface
     # ------------------------------------------------------------------
@@ -510,6 +515,7 @@
             if fresh and fresh.profile_id != session.profile_id:
                 session.profile_id = fresh.profile_id
                 profile = self._profiles.get(session.profile_id)
+                self._set_active_profile(profile)
                 tools = self._tool_list(profile.get_agent_tools())
                 tool_schemas = [t.schema() for t in tools]
                 llm = self._get_backend(profile.llm_backend)
diff --git a/navi/core/compressor.py b/navi/core/compressor.py
index bcffeaa..03a9a12 100644
--- a/navi/core/compressor.py
+++ b/navi/core/compressor.py
@@ -3,63 +3,107 @@
 
 Flow:
 1. Partition session messages into "to_summarize" (old turns) and "to_keep" (recent turns).
-2. Call the LLM to produce a concise bullet-point summary of the old turns.
-3. Replace the old turns with a single summary message (role=user, is_summary=True).
+2. Call the LLM to produce a structured summary of the old messages.
+3. Replace the old turns with a structured summary message (role=user, is_summary=True).
 
 A "turn" is one user message plus all following assistant/tool messages up to the
 next user message. Tool call groups (assistant + tool results) are never split.
 Existing summary messages are always folded into the next compression pass.
+
+Compression is profile-aware: AgentProfile can provide compression_keep_recent,
+compression_max_tokens, and a compression_prompt_file to specialize summaries.
 """
 
 import json
+import re
 from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING
 
 from navi.llm.base import LLMBackend, Message
 from navi.config import settings
 from .events import ContextCompressed
 
-_SUMMARIZE_SYSTEM = (
+if TYPE_CHECKING:
+    from navi.profiles.base import AgentProfile
+
+
+_SUMMARY_SECTIONS = [
+    ("Goal", "One clear sentence describing what the user is trying to accomplish in this session. Include deadlines or constraints if stated."),
+    ("Active Files", "Every file or directory the assistant touched, with absolute or project-relative path and status: created / modified / read / deleted. For modified files, note the purpose of the change."),
+    ("Decisions & User Preferences", "Explicit choices, architecture decisions, style preferences, or corrections stated by the user. Things the user said NOT to do."),
+    ("Completed Work", "Concrete finished steps — include file/function names and verification outcome if available."),
+    ("Pending Work / Todo", "Open tasks, in-progress items, or follow-ups that still need action."),
+    ("Errors & Blockers", "Failures, exceptions, or unresolved issues. Include exact error snippets when short and diagnostic."),
+    ("Key Values", "Exact constants the assistant should remember: ports, config keys, versions, dependency names, important paths, IDs."),
+]
+
+_SUMMARY_TEMPLATE_INSTRUCTIONS = (
     "You are summarizing a conversation history to free up context space. "
     "The assistant will continue working using ONLY this summary — it will have no access "
     "to the original messages. Be thorough and precise. Prefer specifics over generalities. "
     "This summary is historical context, not a new user request.\n\n"
-    "## Current goal\n"
-    "What the user is trying to accomplish in this session. Include any stated deadlines, "
-    "constraints, or acceptance criteria.\n\n"
-    "## Work state\n"
-    "What has been completed (be specific — name files, functions, endpoints, steps). "
-    "What is still in progress or pending. Any blockers or open questions.\n\n"
-    "## Key facts\n"
-    "Everything the user told the assistant: preferences, system details, environment info, "
-    "decisions made, constraints discovered, explicit instructions. Include exact values "
-    "(port numbers, file paths, config keys, IDs) — do not paraphrase if precision matters.\n\n"
-    "## Outputs\n"
-    "Every file created or modified (full paths). Every config value set. "
-    "Commands run and their outcome. Preserve exact command output only when it is short "
-    "and needed to prove a result or diagnose an error. "
-    "Do not preserve tool-call-like examples with parenthesized arguments; describe tool "
-    "usage in words or as key/value facts instead.\n\n"
-    "## Errors\n"
-    "Failures, exceptions, or unexpected results encountered. "
-    "How each was resolved — or that it remains unresolved.\n\n"
-    "## User preferences and feedback\n"
-    "Corrections the user made to the assistant's approach. Explicit style or behavior "
-    "preferences stated during this session. Things the user said not to do.\n\n"
-    "Do not include greetings, filler, transitions, or meta-commentary about the summary itself. "
-    "Do not use Markdown code fences or inline-code backticks unless preserving a user-authored "
-    "literal value is essential. "
-    "Write in tight prose or bullet points — whatever preserves more information per token."
+    "Use EXACTLY the Markdown structure below. Every section must be present. "
+    "If a section has no relevant information, write its header and the literal word NONE. "
+    "Keep bullet points tight and information-dense. "
+    "Do not include greetings, filler, transitions, or meta-commentary.\n\n"
+    + "\n\n".join(f"## {title}\n{desc}" for title, desc in _SUMMARY_SECTIONS)
+    + "\n\n"
+    "Output rules:\n"
+    "- Preserve exact file paths, function names, config keys, and short error snippets verbatim.\n"
+    "- Do not paraphrase values that must stay precise.\n"
+    "- Do not write implementation code, patches, or long command output.\n"
+    "- Use Markdown headers exactly as shown."
 )
 
 
+# Tools whose full output is often needed later and should not be aggressively truncated.
+_CRITICAL_TOOL_NAMES = frozenset({
+    "filesystem",
+    "code_exec",
+    "terminal",
+    "ssh_exec",
+})
+
+
+# Content markers that make a turn worth preserving verbatim longer.
+_CRITICAL_PATTERNS = [
+    re.compile(r"\b(error|exception|traceback|failed|failure)\b", re.IGNORECASE),
+    re.compile(r"\b(user\s+said|no,\s+|don't\s+|do\s+not\s+|never\s+|instead\s+|wrong\s+|incorrect\s+|fix\s+|correct\s+)\b", re.IGNORECASE),
+    re.compile(r"\b(edited|modified|created|deleted|wrote|added)\s+(file|function|class|method)\b", re.IGNORECASE),
+]
+
+
 def should_compress(context_tokens: int, max_context_tokens: int, threshold: float) -> bool:
     return context_tokens >= int(max_context_tokens * threshold)
 
 
+def _turn_importance(turn: list[Message]) -> int:
+    """Score a turn for adaptive keep_recent. Higher = more important to keep."""
+    score = 0
+    text = "\n".join((m.content or "") for m in turn)
+    lowered = text.lower()
+    # Strong signals: user corrections and explicit negatives
+    if any(w in lowered for w in ("wrong", "incorrect", "fix", "don't use", "do not use", "instead use", "change to")):
+        score += 3
+    for pattern in _CRITICAL_PATTERNS:
+        score += len(pattern.findall(text))
+    for m in turn:
+        if getattr(m, "is_compression_critical", False):
+            score += 3
+        if m.role == "tool" and m.name in _CRITICAL_TOOL_NAMES:
+            score += 1
+        if m.role == "user" and len((m.content or "").strip()) <= 20:
+            # Very short user messages are usually social/filler; deprioritize
+            score -= 2
+    return max(0, score)
+
+
 def partition_messages(
     messages: list[Message],
     keep_recent: int,
     keep_recent_messages: int | None = None,
+    adaptive: bool = True,
 ) -> tuple[list[Message], list[Message]]:
     """
     Returns (to_summarize, to_keep).
@@ -67,6 +111,8 @@
     Keeps the system message and the last `keep_recent` conversational turns verbatim.
     Everything older goes into to_summarize.
     Tool call groups (assistant + tool results) always stay together.
+    When adaptive=True, important turns (user corrections, errors, critical tools)
+    are kept longer and social/filler turns are compressed sooner.
     """
     non_system = [m for m in messages if m.role != "system"]
 
@@ -89,8 +135,24 @@
                 return intra_turn
         return [], non_system  # nothing old enough to compress
 
-    old_turns = turns[:-keep_recent]
-    recent_turns = turns[-keep_recent:]
+    # Adaptive: pull important older turns into the kept region and push
+    # unimportant recent/filler turns out for summarization.
+    base_keep = keep_recent
+    recent_turns = turns[-base_keep:]
+    old_turns = turns[:-base_keep]
+    if adaptive:
+        # Identify important old turns that should not be lost.
+        important_old = [t for t in old_turns if _turn_importance(t) > 0]
+        # Identify filler turns in the recent window that can be swapped out.
+        filler_recent = [t for t in recent_turns if _turn_importance(t) == 0]
+        swaps = min(len(important_old), len(filler_recent))
+        for i in range(swaps):
+            # Replace the oldest filler in recent with the most important old turn.
+            recent_turns[recent_turns.index(filler_recent[i])] = important_old[-(i + 1)]
+        # Re-sort kept turns by original position so context order stays chronological.
+        kept_set = {id(t) for t in recent_turns}
+        recent_turns = [t for t in turns if id(t) in kept_set]
+        old_turns = [t for t in turns if id(t) not in kept_set]
 
     to_summarize = [m for turn in old_turns for m in turn]
     to_keep = [m for turn in recent_turns for m in turn]
@@ -167,9 +229,19 @@
                 lines.append(f"[Tool call: {tc.name}; arguments preview: {args_preview}]")
             i += 1
             while i < len(messages) and messages[i].role == "tool":
-                result = messages[i].content or ""
-                preview = result[:300] + ("…" if len(result) > 300 else "")
-                lines.append(f"[Tool result: {messages[i].name}; preview: {preview}]")
+                tool_msg = messages[i]
+                result = tool_msg.content or ""
+                # Critical tool results and explicit critical flag survive verbatim
+                # up to a larger budget, so exact errors/file contents are preserved.
+                critical = (
+                    getattr(tool_msg, "is_compression_critical", False)
+                    or tool_msg.name in _CRITICAL_TOOL_NAMES
+                )
+                if critical and len(result) <= 4000:
+                    preview = result
+                else:
+                    preview = result[:300] + ("…" if len(result) > 300 else "")
+                lines.append(f"[Tool result: {tool_msg.name}; preview: {preview}]")
                 i += 1
 
         elif m.role == "assistant" and m.content:
@@ -229,6 +301,24 @@
     )
 
 
+def _build_summary_system_prompt(profile: "AgentProfile | None") -> str:
+    """Build the system prompt used by the summarization LLM.
+
+    Uses the profile-specific compression prompt file if configured.
+    """
+    base = _SUMMARY_TEMPLATE_INSTRUCTIONS
+    extra = ""
+    if profile is not None and getattr(profile, "compression_prompt_file", None):
+        profile_dir = Path("navi/profiles") / profile.id
+        prompt_path = profile_dir / profile.compression_prompt_file
+        if prompt_path.exists():
+            try:
+                extra = "\n\n---\n\n[Profile-specific compression instructions]\n\n" + prompt_path.read_text(encoding="utf-8").strip()
+            except Exception:
+                pass
+    return base + extra
+
+
 async def compress_context(
     context: list[Message],
     llm: LLMBackend,
@@ -237,6 +327,7 @@
     keep_recent: int,
     max_tokens: int | None = None,
     keep_recent_messages: int | None = None,
+    profile: "AgentProfile | None" = None,
 ) -> tuple[list[Message], str] | None:
     """
     Summarize old messages in the LLM context and return a shorter context list.
@@ -250,12 +341,20 @@
     Uses the same model already loaded in memory (profile.model passed via WorkerContext) —
     no model swap, no extra loading overhead.
 
+    Profile settings override global defaults when provided:
+      - compression_keep_recent -> keep_recent
+      - compression_max_tokens -> max_tokens
+      - compression_prompt_file -> appended to summary system prompt
+
     Exceptions propagate to the caller (CompressionWorker catches them).
     """
+    effective_keep_recent = getattr(profile, "compression_keep_recent", None) or keep_recent
+    effective_max_tokens = getattr(profile, "compression_max_tokens", None) or max_tokens
+
     system_msgs = [m for m in context if m.role == "system"]
     to_summarize, to_keep = partition_messages(
         context,
-        keep_recent,
+        effective_keep_recent,
         keep_recent_messages=keep_recent_messages,
     )
 
@@ -265,7 +364,7 @@
     if len(to_summarize) < 2 and keep_recent_messages is not None and keep_recent_messages > 2:
         to_summarize, to_keep = partition_messages(
             context,
-            keep_recent,
+            effective_keep_recent,
             keep_recent_messages=2,
         )
 
@@ -294,14 +393,15 @@
     if len(summary_text_input) > _MAX_SUMMARY_INPUT_CHARS:
         summary_text_input = summary_text_input[:_MAX_SUMMARY_INPUT_CHARS] + "\n…[truncated]"
 
+    system_prompt = _build_summary_system_prompt(profile)
     prompt = [
-        Message(role="system", content=_SUMMARIZE_SYSTEM),
+        Message(role="system", content=system_prompt),
         Message(role="user", content=summary_text_input, images=images or None),
     ]
 
     # think=False: compression must be fast — extended reasoning wastes context and hangs
     response = await llm.complete(
-        prompt, tools=None, temperature=temperature, model=model, think=False, max_tokens=max_tokens
+        prompt, tools=None, temperature=temperature, model=model, think=False, max_tokens=effective_max_tokens
     )
     summary_text = (response.content or "").strip() or "(summary unavailable)"
 
@@ -336,6 +436,13 @@
         imgs = sum(500 for m in context if m.images)
         return chars // 3 + imgs
 
+    def __init__(self) -> None:
+        self._profile: "AgentProfile | None" = None
+
+    def set_profile(self, profile: "AgentProfile | None") -> None:
+        """Tell the compressor which profile is active so it can use profile-specific settings."""
+        self._profile = profile
+
     async def compress_session(
         self,
         context: list[Message],
@@ -352,6 +459,9 @@
         Does NOT mutate the session — the caller is responsible for updating
         session.context, session.context_token_count, and persisting.
         """
+        effective_keep_recent = getattr(self._profile, "compression_keep_recent", None) or keep_recent
+        effective_max_tokens = getattr(self._profile, "compression_max_tokens", None) or max_tokens
+
         # Attempt 1: normal compression
         try:
             result = await compress_context(
@@ -359,9 +469,10 @@
                 llm=llm,
                 model=model,
                 temperature=temperature,
-                keep_recent=keep_recent,
-                max_tokens=max_tokens,
+                keep_recent=effective_keep_recent,
+                max_tokens=effective_max_tokens,
                 keep_recent_messages=keep_recent_messages,
+                profile=self._profile,
             )
         except Exception:
             # Attempt 2: keep more recent turns verbatim
@@ -371,11 +482,12 @@
                     llm=llm,
                     model=model,
                     temperature=temperature,
-                    keep_recent=keep_recent + 4,
-                    max_tokens=max_tokens,
+                    keep_recent=effective_keep_recent + 4,
+                    max_tokens=effective_max_tokens,
                     keep_recent_messages=(keep_recent_messages + 4)
                     if keep_recent_messages is not None
                     else None,
+                    profile=self._profile,
                 )
             except Exception:
                 # Attempt 3: hard-truncate fallback
diff --git a/navi/core/subagent_runner.py b/navi/core/subagent_runner.py
index 23dc34a..3ec7c23 100644
--- a/navi/core/subagent_runner.py
+++ b/navi/core/subagent_runner.py
@@ -60,6 +60,10 @@
         self._sessions = session_store
         self._mcp_manager = mcp_manager
 
+    def set_profile(self, profile) -> None:
+        """Propagate active profile to compressor so subagent uses profile-specific compression settings."""
+        self._compressor.set_profile(profile)
+
     async def run(
         self,
         user_message: str,
diff --git a/navi/llm/base.py b/navi/llm/base.py
index 10fb8b9..67e2aa8 100644
--- a/navi/llm/base.py
+++ b/navi/llm/base.py
@@ -64,6 +64,8 @@
     # DB sequence number — set by PgSessionStore on load, used for delta-save.
     # -1 means "not yet persisted" (new messages created by the agent).
     sequence_number: int = Field(default=-1, exclude=True)
+    # Marks messages that must survive compression verbatim (user corrections, exact tool output)
+    is_compression_critical: bool = False
 
 
 class LLMResponse(BaseModel):
diff --git a/navi/profiles/base.py b/navi/profiles/base.py
index d030d75..dfead92 100644
--- a/navi/profiles/base.py
+++ b/navi/profiles/base.py
@@ -123,6 +123,14 @@
     # Global providers (global_provider=True) are always injected regardless of this list.
     context_providers: list[str] = Field(default_factory=list)
 
+    # Optional compression tuning for this profile. When provided, the context
+    # compressor uses these values as defaults instead of global settings.
+    compression_keep_recent: int | None = None
+    compression_max_tokens: int | None = None
+    # Optional path (relative to profile dir) of a plain-text file containing an
+    # additional system prompt appended to the compressor's summary instructions.
+    compression_prompt_file: str | None = None
+
     @field_validator("model", mode="before")
     @classmethod
     def _coerce_model(cls, v):
diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py
index b988918..4857438 100644
--- a/navi/profiles/loader.py
+++ b/navi/profiles/loader.py
@@ -108,6 +108,9 @@
                 subagent_think_enabled=config.get("subagent_think_enabled", None),
                 subagent_system_prompt=subagent_system_prompt,
                 context_providers=config.get("context_providers", []),
+                compression_keep_recent=config.get("compression_keep_recent", None),
+                compression_max_tokens=config.get("compression_max_tokens", None),
+                compression_prompt_file=config.get("compression_prompt_file", None),
             ))
             log.debug("profile.loader.loaded", profile_id=config["id"])
 
@@ -158,6 +161,9 @@
         "is_subagent_only": profile.is_subagent_only,
         "tools": profile.tools.model_dump(mode="json"),
         "context_providers": profile.context_providers,
+        "compression_keep_recent": profile.compression_keep_recent,
+        "compression_max_tokens": profile.compression_max_tokens,
+        "compression_prompt_file": profile.compression_prompt_file,
     }
 
     config_file = profile_dir / "config.json"
diff --git a/navi/profiles/navi_code/compression_prompt.txt b/navi/profiles/navi_code/compression_prompt.txt
new file mode 100644
index 0000000..ab59d85
--- /dev/null
+++ b/navi/profiles/navi_code/compression_prompt.txt
@@ -0,0 +1,14 @@
+You are summarizing a local-terminal coding session. Preserve information that is essential for continuing implementation and verification.
+
+Priority rules:
+- Keep every file path the assistant read, created, or modified, with the action taken.
+- Keep exact code signatures the user explicitly approved or that were final (function/class names, important config keys, exact command-line flags).
+- Keep the outcome of the last test/build/verification run (pass/fail and the final error snippet if it failed).
+- Keep the current todo list state and any pending sub-tasks.
+- Keep user corrections about style, approach, or things the user said must/not be done.
+- Keep exact environment facts: ports, Python versions, dependency names, paths to project roots, special local quirks.
+
+Do not preserve:
+- Long terminal output, full stack traces, or verbose directory listings.
+- Social greetings, filler, or commentary about the summary itself.
+- Intermediate reasoning or tool-call argument previews.
diff --git a/navi/profiles/navi_code/config.json b/navi/profiles/navi_code/config.json
index 4ddd862..b7f837d 100644
--- a/navi/profiles/navi_code/config.json
+++ b/navi/profiles/navi_code/config.json
@@ -32,6 +32,9 @@
   "top_k": 40,
   "top_p": 0.88,
   "num_thread": 11,
+  "compression_keep_recent": 12,
+  "compression_max_tokens": 4000,
+  "compression_prompt_file": "compression_prompt.txt",
   "tools": {
     "agent": {
       "native": [
diff --git a/tests/unit/core/test_compressor.py b/tests/unit/core/test_compressor.py
index 07c59a2..b17bc11 100644
--- a/tests/unit/core/test_compressor.py
+++ b/tests/unit/core/test_compressor.py
@@ -4,13 +4,15 @@
 
 from navi.core.compressor import (
     ContextCompressor,
+    _build_summary_system_prompt,
     _format_for_summary,
+    _turn_importance,
     compress_context,
     partition_messages,
     should_compress,
 )
 from navi.llm.base import Message, ToolCallRequest
-from tests.conftest_factory import FakeLLMBackend
+from tests.conftest_factory import FakeLLMBackend, make_profile
 
 
 class TestShouldCompress:
@@ -471,3 +473,78 @@
             keep_recent=5,
         )
         assert result is None
+
+    def test_turn_importance(self):
+        important = [
+            Message(role="user", content="that is wrong, use json instead"),
+        ]
+        casual = [
+            Message(role="user", content="hi"),
+            Message(role="assistant", content="hello"),
+        ]
+        assert _turn_importance(important) > _turn_importance(casual)
+
+    def test_adaptive_partition_keeps_important_turns(self):
+        msgs = []
+        for i in range(6):
+            msgs.append(Message(role="user", content=f"task {i}"))
+            msgs.append(Message(role="assistant", content=f"answer {i}"))
+        # Mark one old turn as important; it should survive even with keep_recent=2
+        msgs[6].content = "this is wrong, fix it"
+        old, recent = partition_messages(msgs, keep_recent=2)
+        assert msgs[6] in recent  # user correction stays
+
+    def test_format_keeps_critical_tool_result(self):
+        long_result = "exact error output\n" * 100
+        msgs = [
+            Message(
+                role="assistant",
+                tool_calls=[ToolCallRequest(id="1", name="filesystem", arguments={})],
+            ),
+            Message(role="tool", content=long_result, name="filesystem", tool_call_id="1", is_compression_critical=True),
+        ]
+        text, _ = _format_for_summary(msgs)
+        assert long_result in text
+
+    def test_format_truncates_noncritical_tool_result(self):
+        long_result = "x" * 1000
+        msgs = [
+            Message(
+                role="assistant",
+                tool_calls=[ToolCallRequest(id="1", name="web_search", arguments={})],
+            ),
+            Message(role="tool", content=long_result, name="web_search", tool_call_id="1"),
+        ]
+        text, _ = _format_for_summary(msgs)
+        assert long_result not in text
+        assert "…" in text
+
+    def test_summary_prompt_uses_profile_compression_prompt_file(self):
+        profile = make_profile("navi_code", compression_prompt_file="compression_prompt.txt")
+        prompt = _build_summary_system_prompt(profile)
+        assert "## Goal" in prompt
+        assert "## Active Files" in prompt
+
+    async def test_profile_overrides_compression_max_tokens(self):
+        profile = make_profile("test", compression_max_tokens=1234)
+        backend = FakeLLMBackend(responses=["short"])
+        context = [
+            Message(role="user", content="1"),
+            Message(role="assistant", content="a1"),
+            Message(role="user", content="2"),
+            Message(role="assistant", content="a2"),
+            Message(role="user", content="3"),
+            Message(role="assistant", content="a3"),
+        ]
+        _, _ = await compress_context(
+            context=context,
+            llm=backend,
+            model="test",
+            temperature=0.3,
+            keep_recent=1,
+            max_tokens=9999,
+            profile=profile,
+        )
+        # max_tokens passed to llm.complete should be overridden by profile
+        # FakeLLMBackend ignores max_tokens, but we can at least verify the call ran
+        assert backend._call_idx == 1