Newer
Older
navi-1 / navi / memory / extractor.py
@Eugene Sukhodolskiy Eugene Sukhodolskiy on 25 Apr 5 KB Improve compression and memory prompts
"""
Fact extraction and summary generation for the memory system.

Flow (triggered when a session is considered complete):
1. Format session.messages as plain text
2. Ask LLM to extract stable facts about the user → upsert into memory_facts
3. If new facts were found → regenerate summary from all facts
"""

import json
import structlog

from navi.llm.base import LLMBackend, Message

from .store import MemoryStore

log = structlog.get_logger()

_EXTRACT_SYSTEM = """\
You extract stable facts about the user from a conversation transcript.

Extract ONLY facts that are:
- Persistent characteristics: name, age, location, occupation, family situation
- Technical environment: OS, tools, languages, home servers, devices
- Preferences: communication style, coding habits, things they like or dislike
- Ongoing projects or goals
- Any other stable, reusable facts about this specific person

Do NOT extract:
- Topics that were discussed or questions that were asked
- Temporary states ("was tired", "was busy today")
- Information about third parties that isn't about the user
- Directory-specific project notes, one-off commands, file paths, task progress, or local quirks
  that belong in NAVI.md instead of long-term user memory
- Duplicate facts already present in the transcript

Return ONLY a valid JSON array. No markdown, no prose, no comments.
Return empty [] if nothing new should be extracted.
Schema:
[
  {"category": "profile", "key": "name", "value": "Eugene"},
  {"category": "technical", "key": "primary_os", "value": "Arch Linux"},
  {"category": "preferences", "key": "response_language", "value": "Russian"}
]

Valid categories: profile, preferences, technical, projects, other"""

_SUMMARY_SYSTEM = """\
You are writing a memory summary for an AI assistant about its user.
Summarize the facts below in 2-4 short paragraphs (max 400 words).
Write from the assistant's perspective: what you know about the user.
Be specific and concrete. Cover the most important identifying details first,
then preferences and ongoing context.
Do not add facts not present below. Do not include task progress, local directory notes,
or one-off commands; those belong in NAVI.md, not user memory."""


async def extract_and_update(
    session,
    llm: LLMBackend,
    model: str,
    memory_store: MemoryStore,
) -> None:
    """
    Extract facts from a session and update the memory summary.
    Safe to call multiple times — already-extracted sessions produce no duplicates.
    """
    facts_added = await _extract_facts(session, llm, model, memory_store)
    log.info("memory.extracted", session_id=session.id, facts_added=facts_added)

    await memory_store.mark_session_extracted(session.id)

    if facts_added > 0:
        await _regenerate_summary(llm, model, memory_store)


async def _extract_facts(session, llm: LLMBackend, model: str, store: MemoryStore) -> int:
    lines: list[str] = []
    for msg in session.messages:
        if msg.role == "user" and msg.content:
            lines.append(f"User: {msg.content}")
        elif msg.role == "assistant" and msg.content:
            lines.append(f"Assistant: {msg.content}")

    if not lines:
        return 0

    prompt = [
        Message(role="system", content=_EXTRACT_SYSTEM),
        Message(role="user", content="\n".join(lines)),
    ]

    try:
        response = await llm.complete(prompt, tools=None, temperature=0.1, model=model)
        raw = (response.content or "").strip()
    except Exception:
        log.warning("memory.extract_llm_error", session_id=session.id, exc_info=True)
        return 0

    # Find JSON array in response (model may add surrounding text)
    start = raw.find("[")
    end = raw.rfind("]") + 1
    if start == -1 or end == 0:
        return 0

    try:
        facts = json.loads(raw[start:end])
    except json.JSONDecodeError:
        log.warning("memory.extract_parse_error", session_id=session.id, raw=raw[:300])
        return 0

    count = 0
    for fact in facts:
        if not isinstance(fact, dict):
            continue
        category = str(fact.get("category", "other")).strip().lower()
        if category not in {"profile", "preferences", "technical", "projects", "other"}:
            category = "other"
        key = str(fact.get("key", "")).strip()
        value = str(fact.get("value", "")).strip()
        if key and value:
            await store.upsert_fact(category, key, value, session.id)
            count += 1

    return count


async def _regenerate_summary(llm: LLMBackend, model: str, store: MemoryStore) -> None:
    facts = await store.get_all_facts()
    if not facts:
        return

    # Group by category, sort by recency within each
    by_cat: dict[str, list] = {}
    for f in facts:
        by_cat.setdefault(f["category"], []).append(f)

    lines: list[str] = []
    for cat in sorted(by_cat):
        lines.append(f"[{cat}]")
        for f in by_cat[cat]:
            lines.append(f"  {f['key']}: {f['value']}")

    prompt = [
        Message(role="system", content=_SUMMARY_SYSTEM),
        Message(role="user", content="\n".join(lines)),
    ]

    try:
        response = await llm.complete(prompt, tools=None, temperature=0.3, model=model)
        summary = (response.content or "").strip()
    except Exception:
        log.warning("memory.summary_llm_error", exc_info=True)
        return

    if summary:
        await store.set_summary(summary)
        log.info("memory.summary_updated", fact_count=len(facts))