diff --git a/navi/core/compressor.py b/navi/core/compressor.py index d8ee93a..b923fcf 100644 --- a/navi/core/compressor.py +++ b/navi/core/compressor.py @@ -182,6 +182,48 @@ # Prevents the summarizer from receiving near-context-sized input it can't fit alongside output. _MAX_SUMMARY_INPUT_CHARS = 24_000 +# When existing summaries in to_summarize exceed this many chars combined, +# run a quick meta-summary to consolidate them before the main compression pass. +_META_SUMMARY_THRESHOLD = _MAX_SUMMARY_INPUT_CHARS // 3 # 8_000 + +_META_SUMMARY_SYSTEM = ( + "You are condensing multiple conversation summaries into a single compact summary. " + "Preserve all key facts, decisions, file paths, config values, errors, and user preferences. " + "Eliminate redundancy between overlapping summaries. " + "Write tight bullet points. Do not include filler or meta-commentary." +) + + +async def _meta_summarize( + summaries: list[Message], + llm: LLMBackend, + model, + temperature: float, +) -> Message: + """Consolidate multiple existing summary messages into one compact meta-summary.""" + combined = "\n\n---\n\n".join(m.content for m in summaries) + prompt = [ + Message(role="system", content=_META_SUMMARY_SYSTEM), + Message(role="user", content=combined), + ] + response = await llm.complete( + prompt, + tools=None, + temperature=temperature, + model=model, + think=False, + max_tokens=1500, + ) + text = (response.content or "").strip() or "(consolidated summary unavailable)" + return Message( + role="user", + content=f"[Consolidated Context Summary]\n{text}", + is_summary=True, + is_display=False, + is_context=True, + created_at=datetime.now(timezone.utc), + ) + async def compress_context( context: list[Message], @@ -226,6 +268,22 @@ if len(to_summarize) < 2: return None # nothing substantial to compress + # Meta-summary: if to_summarize contains multiple existing summary messages + # that are long enough to crowd the summarizer input, consolidate them first. + summary_msgs = [m for m in to_summarize if m.is_summary] + if len(summary_msgs) > 1: + total_summary_len = sum(len(m.content or "") for m in summary_msgs) + if total_summary_len > _META_SUMMARY_THRESHOLD: + try: + meta = await _meta_summarize( + summary_msgs, llm, model, temperature + ) + to_summarize = [m for m in to_summarize if not m.is_summary] + to_summarize.insert(0, meta) + except Exception: + # If meta-summary fails, continue with raw summaries as-is + pass + summary_text_input, images = _format_for_summary(to_summarize) # Truncate oversized input so the summarizer LLM has room to generate output diff --git a/tests/unit/core/test_compressor.py b/tests/unit/core/test_compressor.py index c905125..07c59a2 100644 --- a/tests/unit/core/test_compressor.py +++ b/tests/unit/core/test_compressor.py @@ -213,6 +213,100 @@ assert new_context[1].content == "build a model" assert len(new_context) == 6 # summary + user + 4 recent messages + async def test_meta_summary_consolidates_multiple_summaries(self): + """When to_summarize contains multiple long existing summaries, a meta-summary + pass runs first (consolidating them) before the main compression.""" + # First response = meta-summary, second = main compression + backend = FakeLLMBackend(responses=["Meta summary", "Final summary"]) + # Build a context with two existing summaries (each > 4000 chars to cross threshold) + big_summary_1 = "A" * 5000 + big_summary_2 = "B" * 5000 + context = [ + Message(role="system", content="sys"), + Message(role="user", content=big_summary_1, is_summary=True, is_display=False), + Message(role="user", content=big_summary_2, is_summary=True, is_display=False), + Message(role="user", content="recent question"), + Message(role="assistant", content="recent answer"), + Message(role="user", content="new question"), + Message(role="assistant", content="new answer"), + ] + new_context, summary = await compress_context( + context=context, + llm=backend, + model="test", + temperature=0.3, + keep_recent=2, + ) + assert summary == "Final summary" + # system + 1 consolidated summary + 2 recent turns = 6 + assert len(new_context) == 6 + assert new_context[1].is_summary is True + # Exactly one summary in final context (meta + raw folded into one) + assert sum(1 for m in new_context if m.is_summary) == 1 + # Two LLM calls happened (meta + main) + assert backend._call_idx == 2 + + async def test_meta_summary_skipped_when_summaries_are_short(self): + """Short existing summaries should not trigger an extra meta-summary pass.""" + backend = FakeLLMBackend(responses=["Final summary"]) + context = [ + Message(role="system", content="sys"), + Message(role="user", content="short summary 1", is_summary=True, is_display=False), + Message(role="user", content="short summary 2", is_summary=True, is_display=False), + Message(role="user", content="recent question"), + Message(role="assistant", content="recent answer"), + Message(role="user", content="new question"), + Message(role="assistant", content="new answer"), + ] + new_context, summary = await compress_context( + context=context, + llm=backend, + model="test", + temperature=0.3, + keep_recent=2, + ) + assert summary == "Final summary" + assert len(new_context) == 6 + # Only one LLM call (main compression) because summaries were too short + # to trigger meta-summary + assert backend._call_idx == 1 + + async def test_meta_summary_graceful_on_failure(self): + """If meta-summary fails, compression continues with raw summaries.""" + import navi.core.compressor as compressor_module + + backend = FakeLLMBackend(responses=["Final summary"]) + original_meta = compressor_module._meta_summarize + + async def _failing_meta(*args, **kwargs): + raise RuntimeError("meta boom") + + compressor_module._meta_summarize = _failing_meta + try: + big = "X" * 5000 + context = [ + Message(role="system", content="sys"), + Message(role="user", content=big, is_summary=True, is_display=False), + Message(role="user", content=big, is_summary=True, is_display=False), + Message(role="user", content="q1"), + Message(role="assistant", content="a1"), + Message(role="user", content="q2"), + Message(role="assistant", content="a2"), + ] + new_context, summary = await compress_context( + context=context, + llm=backend, + model="test", + temperature=0.3, + keep_recent=2, + ) + assert summary == "Final summary" + assert len(new_context) == 6 + # Meta failed, so only one call to the backend + assert backend._call_idx == 1 + finally: + compressor_module._meta_summarize = original_meta + async def test_intra_turn_fallback_aggressive(self): """When turn-based partition has nothing to compress but keep_recent_messages is set, an aggressive fallback (keep_recent_messages=2) should still find