"""
Context compressor — summarizes old messages to stay within the token limit.
Flow:
1. Partition session messages into "to_summarize" (old turns) and "to_keep" (recent turns).
2. Call the LLM to produce a concise bullet-point summary of the old turns.
3. Replace the old turns with a single summary message (role=user, is_summary=True).
A "turn" is one user message plus all following assistant/tool messages up to the
next user message. Tool call groups (assistant + tool results) are never split.
Existing summary messages are always folded into the next compression pass.
"""
import json
from datetime import datetime, timezone
from navi.llm.base import LLMBackend, Message
_SUMMARIZE_SYSTEM = (
"You are summarizing a conversation history to free up context space. "
"Produce a concise factual summary covering: key facts the user shared, "
"decisions made, tasks completed or in progress, important outputs or findings. "
"Use bullet points. Be brief — this summary replaces the conversation in context. "
"Do not include greetings or filler."
)
def should_compress(context_tokens: int, max_context_tokens: int, threshold: float) -> bool:
return context_tokens >= int(max_context_tokens * threshold)
def partition_messages(
messages: list[Message], keep_recent: int
) -> tuple[list[Message], list[Message]]:
"""
Returns (to_summarize, to_keep).
Keeps the system message and the last `keep_recent` conversational turns verbatim.
Everything older goes into to_summarize.
Tool call groups (assistant + tool results) always stay together.
"""
non_system = [m for m in messages if m.role != "system"]
# Group into turns: each turn starts with a user message
turns: list[list[Message]] = []
current: list[Message] = []
for msg in non_system:
if msg.role == "user" and current:
turns.append(current)
current = [msg]
else:
current.append(msg)
if current:
turns.append(current)
if len(turns) <= keep_recent:
return [], non_system # nothing old enough to compress
old_turns = turns[:-keep_recent]
recent_turns = turns[-keep_recent:]
to_summarize = [m for turn in old_turns for m in turn]
to_keep = [m for turn in recent_turns for m in turn]
return to_summarize, to_keep
def _format_for_summary(messages: list[Message]) -> str:
"""Render messages as plain text for the summarization prompt."""
lines: list[str] = []
i = 0
while i < len(messages):
m = messages[i]
if m.is_summary:
# Existing summary — include as-is (already compressed)
lines.append(m.content or "")
i += 1
elif m.role == "user":
if m.content:
lines.append(f"User: {m.content}")
i += 1
elif m.role == "assistant" and m.tool_calls:
# Render tool calls + their results as a compact block
for tc in m.tool_calls:
args_preview = json.dumps(tc.arguments)[:120]
lines.append(f"[Called tool `{tc.name}` with {args_preview}]")
i += 1
while i < len(messages) and messages[i].role == "tool":
result = messages[i].content or ""
preview = result[:300] + ("…" if len(result) > 300 else "")
lines.append(f"[Tool `{messages[i].name}` returned: {preview}]")
i += 1
elif m.role == "assistant" and m.content:
lines.append(f"Assistant: {m.content}")
i += 1
else:
i += 1 # skip orphan tool messages
return "\n".join(lines)
async def compress_context(
context: list[Message],
llm: LLMBackend,
model: str,
temperature: float,
keep_recent: int,
) -> list[Message] | None:
"""
Summarize old messages in the LLM context and return a shorter context list.
Only operates on `context` — the full display history (session.messages) is never touched.
Returns None if there is nothing to compress.
Raises LLMBackendError on LLM failure — caller decides how to handle.
"""
system_msgs = [m for m in context if m.role == "system"]
to_summarize, to_keep = partition_messages(context, keep_recent)
if len(to_summarize) < 2:
return None # nothing substantial to compress
prompt = [
Message(role="system", content=_SUMMARIZE_SYSTEM),
Message(role="user", content=_format_for_summary(to_summarize)),
]
response = await llm.complete(prompt, tools=None, temperature=temperature, model=model)
summary_text = (response.content or "").strip() or "(summary unavailable)"
summary_msg = Message(
role="user",
content=f"[Context Summary]\n{summary_text}",
is_summary=True,
created_at=datetime.now(timezone.utc),
)
return system_msgs + [summary_msg] + to_keep