diff --git a/navi/memory/extractor.py b/navi/memory/extractor.py index 9fbfbc0..5069941 100644 --- a/navi/memory/extractor.py +++ b/navi/memory/extractor.py @@ -17,30 +17,41 @@ log = structlog.get_logger() _EXTRACT_SYSTEM = """\ -You extract stable facts about the user from a conversation transcript. +You extract stable facts about the user from a session transcript. -Extract ONLY facts that are: +The transcript contains four types of entries: +1. User messages — what the user explicitly said +2. Assistant messages — Navi's own responses +3. Tool calls — which tools Navi executed and their arguments +4. Tool results — the raw output those tools returned (may be truncated) + +Facts discovered through tool results are MORE RELIABLE than conversation. +If a tool result contradicts something said in chat — trust the tool result. + +Extract facts that are: - Persistent characteristics: name, age, location, occupation, family situation -- Technical environment: OS, tools, languages, home servers, devices -- Preferences: communication style, coding habits, things they like or dislike +- Technical environment: OS, tools, servers, devices, IPs, running services +- Preferences: communication style, coding habits, likes/dislikes, workflow patterns - Ongoing projects or goals -- Any other stable, reusable facts about this specific person Do NOT extract: - Topics that were discussed or questions that were asked - Temporary states ("was tired", "was busy today") - Information about third parties that isn't about the user -- Directory-specific project notes, one-off commands, file paths, task progress, or local quirks - that belong in NAVI.md instead of long-term user memory -- Duplicate facts already present in the transcript +- Directory-specific project notes, one-off commands, file paths, task progress +- NAVI.md content or local operational notes +- Already-known facts that appear in the transcript + +For each fact, indicate its source: +- "conversation" — the user explicitly stated it or it was inferred from chat +- "tool_call" — discovered through a tool execution result Return ONLY a valid JSON array. No markdown, no prose, no comments. Return empty [] if nothing new should be extracted. Schema: [ - {"category": "profile", "key": "name", "value": "Eugene"}, - {"category": "technical", "key": "primary_os", "value": "Arch Linux"}, - {"category": "preferences", "key": "response_language", "value": "Russian"} + {"category": "profile", "key": "name", "value": "Eugene", "source": "conversation", "source_context": "user introduced themselves"}, + {"category": "technical", "key": "host_ip", "value": "192.168.1.168", "source": "tool_call", "source_context": "found via terminal ip addr"} ] Valid categories: profile, preferences, technical, projects, other""" @@ -74,20 +85,49 @@ await _regenerate_summary(llm, model, memory_store) +_MAX_TOOL_RESULT_LEN = 500 +_MAX_TRANSCRIPT_CHARS = 12_000 + + async def _extract_facts(session, llm: LLMBackend, model: str, store: MemoryStore) -> int: lines: list[str] = [] + + # Map tool_call_id -> tool_name so we can label tool results + tool_call_map: dict[str, str] = {} + for msg in session.messages: + if msg.role == "assistant" and msg.tool_calls: + for tc in msg.tool_calls: + tool_call_map[tc.id] = tc.name + for msg in session.messages: if msg.role == "user" and msg.content: lines.append(f"User: {msg.content}") - elif msg.role == "assistant" and msg.content: - lines.append(f"Assistant: {msg.content}") + elif msg.role == "assistant": + if msg.content: + lines.append(f"Assistant: {msg.content}") + if msg.tool_calls: + for tc in msg.tool_calls: + args = str(tc.arguments)[:200] + lines.append(f"[Tool call] {tc.name}({args})") + elif msg.role == "tool" and msg.content: + tool_name = tool_call_map.get(msg.tool_call_id or "", "unknown") + content = msg.content + if len(content) > _MAX_TOOL_RESULT_LEN: + content = content[:_MAX_TOOL_RESULT_LEN] + " ... [truncated]" + lines.append(f"[Tool result] {tool_name}: {content}") if not lines: return 0 + transcript = "\n".join(lines) + if len(transcript) > _MAX_TRANSCRIPT_CHARS: + # Keep early context + recent tail; drop the middle + half = _MAX_TRANSCRIPT_CHARS // 2 + transcript = transcript[:half] + "\n\n... [transcript truncated] ...\n\n" + transcript[-half:] + prompt = [ Message(role="system", content=_EXTRACT_SYSTEM), - Message(role="user", content="\n".join(lines)), + Message(role="user", content=transcript), ] try: @@ -118,14 +158,27 @@ category = "other" key = str(fact.get("key", "")).strip() value = str(fact.get("value", "")).strip() + source = str(fact.get("source", "conversation")).strip().lower() + if source not in {"conversation", "tool_call", "auto_discovery", "user_explicit"}: + source = "conversation" + source_context = str(fact.get("source_context", "")).strip() + + # Confidence mapping based on source reliability + confidence = {"tool_call": 95, "auto_discovery": 95, "user_explicit": 90}.get(source, 70) + if key and value: + # TODO: Semantic deduplication — before upsert, search for semantically + # similar facts and merge/update instead of creating duplicates. + # Problem: LLM generates different keys for the same fact across sessions. + # Solution: vector search + similarity threshold before upsert. await store.upsert_fact( category=category, key=key, value=value, source_session_id=session.id, - source="conversation", - confidence=70, + source=source, + confidence=confidence, + source_context=source_context, ) count += 1