navi-1/webclient/src/stores/chat.js at 3ef005d08c7bc94a2c8e1ad562b511be161fdf7e

Fork: 0
root / navi-1
Find file
Newer
Older
navi-1 / webclient / src / stores / chat.js
Eugene Sukhodolskiy on 25 May 25 KB Fix streaming message orphaning and duplicate message keys
Raw Blame History
import { defineStore } from 'pinia'
import { ref, shallowRef } from 'vue'
import * as api from '@/api/index.js'
import { useSessionsStore } from '@/stores/sessions.js'

// Guard against late completion of stale loadSession calls
let _lastLoadId = null

async function _tryGenerateName(sessionId) {
  try {
    const sessionsStore = useSessionsStore()
    const session = sessionsStore.sessions.find(s => s.session_id === sessionId)
    if (session?.name) return  // already has a name
    const { name } = await api.generateSessionName(sessionId)
    if (name) sessionsStore.updateName(sessionId, name)
  } catch {
    // fire-and-forget — ignore errors
  }
}

export const useChatStore = defineStore('chat', () => {
  const currentId = ref(null)
  const currentProfileId = ref(null)
  const messages = ref([])
  const streaming = ref(false)
  const pendingImages = ref([])   // base64 strings
  const pendingFiles = ref([])    // { name, path, size, content_type }
  const artifacts = ref([])       // published session content
  const files = ref([])           // session directory tree
  const contextTokens = ref(0)
  const maxContextTokens = ref(0)
  const loading = ref(false)

  // The in-progress streaming message (direct ref for perf)
  const streamingMsg = shallowRef(null)
  // True while replaying buffered events from a reconnect — suppresses animations
  const replayMode = ref(false)
  // Index of the message to scroll to after loading a session from search results
  const scrollToMessageIndex = ref(null)
  // Current scheduled recall for the active session (null if none)
  const recall = ref(null)

  // Archive (older messages) pagination state
  const archiveLoading = ref(false)
  const archiveHasMore = ref(false)
  const archiveNextBefore = ref(null)

  async function loadSession(id, targetIndex = null) {
    if (currentId.value === id && targetIndex == null) return
    _lastLoadId = id
    loading.value = true
    messages.value = []
    artifacts.value = []
    files.value = []
    streaming.value = false
    streamingMsg.value = null
    contextTokens.value = 0
    maxContextTokens.value = 0
    scrollToMessageIndex.value = targetIndex

    try {
      const session = await api.getSession(id)
      // Ignore stale responses — user may have switched to a different session while this loaded
      if (_lastLoadId !== id) return
      currentProfileId.value = session.profile_id ?? null
      messages.value = buildMessageList(session.messages ?? [])
      await mergeFeedback(id)
      await fetchArtifacts(id)
      await fetchFiles(id)
      if (session.context_token_count) contextTokens.value = session.context_token_count
      if (session.max_context_tokens)  maxContextTokens.value = session.max_context_tokens
      // Reset archive state for the new session
      archiveLoading.value = false
      archiveHasMore.value = (session.archive_threshold || 0) > 0
      archiveNextBefore.value = null
      // Set currentId and hash AFTER REST completes — this triggers ws.connect() via the
      // watch in ChatArea. Doing it last guarantees messages are populated before WS replay
      // starts, eliminating the race where WS replay arrives before REST and gets overwritten.
      currentId.value = id
      location.hash = id
    } catch (err) {
      console.error('[chat] loadSession failed', err)
    } finally {
      loading.value = false
    }
  }

  function clearSession() {
    currentId.value = null
    currentProfileId.value = null
    messages.value = []
    artifacts.value = []
    files.value = []
    streaming.value = false
    streamingMsg.value = null
    contextTokens.value = 0
    maxContextTokens.value = 0
    location.hash = ''
  }

  // Force-reload session history from the server, bypassing the same-id guard.
  // Called after WebSocket reconnect so the client sees the saved response
  // even if streaming finished while the client was disconnected.
  async function reloadSession(id) {
    if (!id) return
    try {
      const session = await api.getSession(id)
      currentProfileId.value = session.profile_id ?? null
      const built = buildMessageList(session.messages ?? [])
      // Preserve an active streaming message that hasn't been persisted yet
      // (e.g. during a headless recall). Otherwise a late session_sync wipes
      // the in-progress assistant bubble before the stream ends.
      if (streamingMsg.value) {
        if (streaming.value) {
          // Active stream: replace the last assistant message in built with the live
          // streaming msg so Vue continues to observe mutations.
          const idx = built.findLastIndex(m => m.role === 'assistant')
          if (idx >= 0) {
            built[idx] = streamingMsg.value
          } else {
            built.push(streamingMsg.value)
          }
        } else {
          // Stream finished but msg not yet cleared — only add if not already present
          if (!built.find(m => m.id === streamingMsg.value.id)) {
            const lastAssistant = built.findLast(m => m.role === 'assistant')
            if (!lastAssistant || lastAssistant.text !== streamingMsg.value.text) {
              built.push(streamingMsg.value)
            }
          }
        }
      }
      messages.value = built
      await mergeFeedback(id)
      await fetchArtifacts(id)
      await fetchFiles(id)
      if (session.context_token_count) contextTokens.value = session.context_token_count
      if (session.max_context_tokens) maxContextTokens.value = session.max_context_tokens
      // Only clear orphaned streaming state if there is no active stream
      if (!streamingMsg.value) {
        streamingMsg.value = null
        streaming.value = false
      }
    } catch {
      // Silently ignore — stale data is better than a crash
    }
  }

  // Pull stored ratings for this session and stamp them onto built messages.
  // Each assistant block has id `h_<index>` where index is its first raw position;
  // that index is also the feedback key (session_id, message_index).
  async function mergeFeedback(id) {
    try {
      const { feedback = [] } = await api.getFeedback(id)
      if (!feedback.length) return
      const map = new Map(feedback.map(f => [f.message_index, f.rating]))
      for (const m of messages.value) {
        if (m.role !== 'assistant') continue
        const idx = m.id?.startsWith?.('h_') ? Number(m.id.slice(2)) : NaN
        if (Number.isInteger(idx) && map.has(idx)) {
          m.rating = map.get(idx)
        }
      }
    } catch {
      // Feedback is non-critical — ignore failures so chat still works.
    }
  }

  async function fetchArtifacts(id = currentId.value) {
    if (!id) {
      artifacts.value = []
      return
    }
    try {
      const { content = [] } = await api.getSessionContent(id)
      artifacts.value = content
    } catch {
      artifacts.value = []
    }
  }

  async function fetchFiles(id = currentId.value) {
    if (!id) {
      files.value = []
      return
    }
    try {
      const { files: list = [] } = await api.listSessionFiles(id)
      files.value = list
    } catch {
      files.value = []
    }
  }

  function upsertArtifact(item) {
    if (!item?.filename) return
    const key = item.id || item.filename
    const idx = artifacts.value.findIndex(a => (a.id || a.filename) === key)
    const next = { ...item }
    if (idx === -1) artifacts.value.unshift(next)
    else artifacts.value.splice(idx, 1, { ...artifacts.value[idx], ...next })
  }

  // Toggle / set feedback on an assistant block. Optimistic — local state
  // updates first, server call follows; rolls back on failure.
  async function rateMessage(msg, rating) {
    if (!currentId.value || !msg) return
    const idx = msg.id?.startsWith?.('h_') ? Number(msg.id.slice(2)) : NaN
    if (!Number.isInteger(idx)) return
    // Toggle off if user clicks the active rating again
    const next = msg.rating === rating ? 0 : rating
    const prev = msg.rating ?? 0
    msg.rating = next
    try {
      await api.setFeedback(currentId.value, idx, next)
    } catch (err) {
      msg.rating = prev
      throw err
    }
  }

  function saveDraft(text) {
    if (!currentId.value) return
    if (text) localStorage.setItem(`draft:${currentId.value}`, text)
    else localStorage.removeItem(`draft:${currentId.value}`)
  }

  function loadDraft(id) {
    return localStorage.getItem(`draft:${id}`) ?? ''
  }

  // ─── WS event handlers ──────────────────────────────────────────────────

  function onStreamStart() {
    // On reconnect: remove the incomplete frozen message so replay rebuilds it cleanly.
    // On a fresh start streamingMsg is null, so this branch is skipped.
    if (streamingMsg.value) {
      const idx = messages.value.indexOf(streamingMsg.value)
      if (idx !== -1) messages.value.splice(idx, 1)
      streamingMsg.value = null
    }
    streaming.value = true
    const msg = {
      id: `stream_${Date.now()}`,
      role: 'assistant',
      type: 'stream',
      thinking: null,    // { text, done } | null
      tools: [],         // tool card objects
      text: '',
      done: false,
      time: new Date().toISOString(),
      animate: !replayMode.value,  // no cursor animation during replay
      statusLabel: null,
    }
    messages.value.push(msg)
    // Store the reactive proxy (from the array) so mutations trigger Vue updates
    streamingMsg.value = messages.value[messages.value.length - 1]
  }

  function onReplayStart() {
    replayMode.value = true
  }

  function onReplayEnd() {
    replayMode.value = false
    // Restore animate on the message now that live events will follow
    if (streamingMsg.value) streamingMsg.value.animate = true
  }

  function onThinkingDelta(delta) {
    const msg = streamingMsg.value
    if (!msg) return
    if (!msg.thinking) msg.thinking = { text: '', done: false }
    msg.thinking.text += delta
  }

  function onThinkingEnd() {
    const msg = streamingMsg.value
    if (msg?.thinking) msg.thinking.done = true
  }

  // Find the last spawn_agent card that's still collecting subagent events
  function _lastSpawnCard(msg) {
    // Custom findLast for older browser compatibility (ES2023 polyfill-free)
    for (let i = msg.tools.length - 1; i >= 0; i--) {
      const t = msg.tools[i]
      if (t.kind === 'tool' && t.name === 'spawn_agent') return t
    }
    return undefined
  }

  function onTurnThinking(data) {
    const msg = streamingMsg.value
    if (!msg) return
    // Shape item so ThinkingCard can receive it directly as :msg (stable reference, no new object each render)
    const item = { kind: 'turn_thinking', isSubagent: data.is_subagent ?? false, thinking: { text: data.thinking ?? '', done: true } }
    if (data.is_subagent) {
      const spawn = _lastSpawnCard(msg)
      if (spawn) { spawn.steps.push(item); return }
    }
    msg.tools.push(item)
  }

  function onPlanningStatus(data) {
    const msg = streamingMsg.value
    if (!msg) return
    if (data.is_subagent) {
      const spawn = _lastSpawnCard(msg)
      if (spawn) spawn.planningLabel = data.label ?? ''
      return  // never bleed subagent planning into parent UI
    }
    msg.statusLabel = data.label ?? ''
  }

  function onPlanReady(data) {
    const msg = streamingMsg.value
    if (!msg) return
    if (data.is_subagent) {
      const spawn = _lastSpawnCard(msg)
      if (spawn) {
        spawn.planningLabel = null
        spawn.steps.push({ kind: 'plan', text: data.plan ?? '' })
      }
      return  // never bleed subagent plan into parent UI
    }
    msg.statusLabel = null
    msg.tools.push({ kind: 'plan', text: data.plan ?? '' })
  }

  function onToolStarted(data) {
    const msg = streamingMsg.value
    if (!msg) return
    const card = {
      kind: 'tool',
      id: `tool_${Date.now()}`,
      toolCallId: data.tool_call_id ?? null,
      name: data.tool,
      args: data.args,
      result: null,
      success: null,
      pending: true,
      startedAt: Date.now(),
      isSubagent: data.is_subagent ?? false,
      steps: []
    }
    if (data.is_subagent) {
      const spawn = _lastSpawnCard(msg)
      if (spawn) { spawn.steps.push(card); return }
    }
    msg.tools.push(card)
  }

  function onToolCall(data) {
    const msg = streamingMsg.value
    if (!msg) return
    // Sub-agent tool call → update inside spawn_agent steps
    if (data.is_subagent) {
      const spawn = _lastSpawnCard(msg)
      if (spawn) {
        let step = null
        for (let i = spawn.steps.length - 1; i >= 0; i--) {
          const t = spawn.steps[i]
          const match = data.tool_call_id
            ? t.toolCallId === data.tool_call_id
            : t.kind === 'tool' && t.name === data.tool && t.pending
          if (match) { step = t; break }
        }
        if (step) {
          step.result = data.result
          step.success = data.success !== false
          step.pending = false
          return
        }
      }
    }
    // Top-level tool call
    let card = null
    for (let i = msg.tools.length - 1; i >= 0; i--) {
      const t = msg.tools[i]
      const match = data.tool_call_id
        ? t.toolCallId === data.tool_call_id
        : t.kind === 'tool' && t.name === data.tool && t.pending
      if (match) { card = t; break }
    }
    if (card) {
      card.result = data.result
      card.success = data.success !== false
      card.pending = false
      if (data.metadata) card.metadata = data.metadata
      if (data.tool === 'content_publish' && card.success && data.metadata) {
        upsertArtifact(data.metadata)
      }
      if (data.tool === 'filesystem' && card.success && currentId.value) {
        const action = typeof card.args === 'object' ? card.args?.action : null
        if (['write', 'edit', 'append', 'mkdir', 'rm', 'cp', 'mv'].includes(action)) {
          fetchFiles(currentId.value)
        }
        if (['write', 'edit'].includes(action)) fetchArtifacts(currentId.value)
      }
      if ((data.tool === 'terminal' || data.tool === 'code_exec') && card.success && currentId.value) {
        fetchFiles(currentId.value)
      }
      // Update recall banner live when scheduling or managing recalls
      if (data.tool === 'schedule_recall' && card.success && currentId.value) {
        loadRecall(currentId.value)
        useSessionsStore().updateRecallStatus(currentId.value, true)
      }
      if (data.tool === 'manage_recall' && card.success && currentId.value) {
        const action = card.args?.action
        if (action === 'cancel') {
          recall.value = null
          useSessionsStore().updateRecallStatus(currentId.value, false)
        } else if (action === 'skip') {
          loadRecall(currentId.value)
        }
      }
    }
  }

  function onStreamDelta(delta) {
    const msg = streamingMsg.value
    if (!msg) return
    if (msg.statusLabel) msg.statusLabel = null
    msg.text += delta
  }

  function onStreamEnd(data) {
    const msg = streamingMsg.value
    if (msg) {
      msg.done = true
      msg.elapsed_seconds = data?.elapsed_seconds ?? null
      msg.tool_call_count  = data?.tool_call_count  ?? null
      msg.token_count      = data?.token_count      ?? null
      // Assign stable h_ id so the rating UI appears and feedback can be mapped
      const assistantIndex = messages.value.filter(m => m.role === 'assistant').length - 1
      msg.id = `h_${assistantIndex}`
      streamingMsg.value = null
      // Purge if nothing was ever written (avoids empty msg-assistant divs)
      if (!msg.thinking && !msg.tools.length && !msg.text) {
        messages.value = messages.value.filter(m => m !== msg)
      }
    }
    streaming.value = false

    if (data?.context_tokens != null) contextTokens.value = data.context_tokens
    if (data?.max_context_tokens != null) maxContextTokens.value = data.max_context_tokens

    // Update session preview
    if (currentId.value && msg?.text) {
      useSessionsStore().updatePreview(currentId.value, msg.text.slice(0, 80))
    }

    // Try to generate session name in background (only if not named yet)
    if (currentId.value) {
      _tryGenerateName(currentId.value)
    }
  }

  function onStreamStopped() {
    const msg = streamingMsg.value
    if (msg) {
      msg.done = true
      const assistantIndex = messages.value.filter(m => m.role === 'assistant').length - 1
      msg.id = `h_${assistantIndex}`
      streamingMsg.value = null
      if (!msg.thinking && !msg.tools.length && !msg.text) {
        messages.value = messages.value.filter(m => m !== msg)
      }
    }
    streaming.value = false
  }

  function onProfileSwitched(data) {
    currentProfileId.value = data.profile_id
  }

  function onCompressionStarted(data) {
    if (data?.context_tokens != null) contextTokens.value = data.context_tokens
    if (data?.max_context_tokens != null) maxContextTokens.value = data.max_context_tokens
    // Insert a transient notice that will be replaced by the real compression card
    messages.value.push({
      id: `compress_pending_${Date.now()}`,
      role: 'system',
      type: 'compression_pending',
      text: 'Compressing context…'
    })
  }

  function onContextCompressed(data) {
    if (data?.context_tokens != null) contextTokens.value = data.context_tokens
    if (data?.max_context_tokens != null) maxContextTokens.value = data.max_context_tokens
    // Remove any pending compression notices — the real one is here now
    messages.value = messages.value.filter(m => m.type !== 'compression_pending')
    messages.value.push({
      id: `compress_${Date.now()}`,
      role: 'system',
      type: 'compression_notice',
      before: data.messages_before,
      after: data.messages_after,
      summary: data.summary ?? ''
    })
  }

  function onError(data) {
    streaming.value = false
    streamingMsg.value = null
    messages.value.push({
      id: `err_${Date.now()}`,
      role: 'system',
      type: 'error',
      text: data.message ?? 'An error occurred'
    })
  }

  async function loadRecall(id = currentId.value) {
    if (!id) {
      recall.value = null
      return
    }
    try {
      const data = await api.getSessionRecall(id)
      if (data && data.id) {
        recall.value = data
      } else {
        recall.value = null
      }
    } catch {
      recall.value = null
    }
  }

  async function loadArchivedMessages() {
    if (!currentId.value || archiveLoading.value || !archiveHasMore.value) return
    archiveLoading.value = true
    try {
      const data = await api.getArchivedMessages(currentId.value, archiveNextBefore.value)
      const built = buildMessageList(data.items ?? [])
      // Prepend older messages to the beginning of the list
      messages.value = [...built, ...messages.value]
      archiveHasMore.value = data.has_more ?? false
      archiveNextBefore.value = data.next_before_seq ?? null
    } catch (err) {
      console.error('[chat] loadArchivedMessages failed', err)
    } finally {
      archiveLoading.value = false
    }
  }

  function initArchiveState(hasMore = false, nextBefore = null) {
    archiveHasMore.value = hasMore
    archiveNextBefore.value = nextBefore
  }

  function onRecallUpdate(data) {
    if (!data.session_id) return
    const sessionsStore = useSessionsStore()
    const hasRecall = data.status === 'pending'
    sessionsStore.updateRecallStatus(data.session_id, hasRecall)
    if (data.session_id === currentId.value) {
      if (hasRecall) {
        // Partial update from event — refetch for full details
        loadRecall(data.session_id)
      } else {
        recall.value = null
      }
    }
  }

  function appendUserMessage(text, images, files) {
    messages.value.push({
      id: `user_${Date.now()}`,
      role: 'user',
      text,
      images: [...images],
      files: [...files],
      time: new Date().toISOString(),
      animate: true
    })
  }

  // ─── Helpers ────────────────────────────────────────────────────────────

  // The server returns a flat array:
  //   { role:'assistant', tool_calls:[{id,name,arguments}] }  ← tool request (no content)
  //   { role:'tool', tool_call_id, name, content }            ← tool result
  //   { role:'assistant', content }                           ← final text response
  //
  // We group them into single AssistantMessage objects with tools[] + text.
  function buildMessageList(raw) {
    const result = []
    let i = 0

    while (i < raw.length) {
      const m = raw[i]

      // Compression notice
      if (m.is_compression) {
        result.push({ id: `h_${i}`, role: 'system', type: 'compression_notice', summary: m.content ?? '' })
        i++; continue
      }

      // Skip bare system messages
      if (m.role === 'system') { i++; continue }

      if (m.is_summary) {
        result.push({ id: `h_${i}`, role: 'assistant', type: 'summary', text: m.content ?? '', rawIndices: [i] })
        i++; continue
      }

      if (m.role === 'user') {
        const imgs = (m.images ?? []).map(b =>
          (typeof b === 'string' && b.startsWith('data:')) ? b : `data:image/jpeg;base64,${String(b ?? '')}`
        )
        result.push({ id: `h_${i}`, role: 'user', text: m.content ?? '', images: imgs, files: m.files ?? [], time: m.created_at ?? null, rawIndices: [i], isRecall: m.is_recall ?? false })
        i++; continue
      }

      if (m.role === 'assistant') {
        // Collect all tool-call rounds + final content into one message
        const firstIdx = i
        const tools = []
        let thinking = null
        let text = ''
        let time = null

        while (i < raw.length && raw[i].role === 'assistant') {
          const am = raw[i]

          // Plan card — stored as a separate is_plan message, inject into tools array
          if (am.is_plan) {
            tools.push({ kind: 'plan', text: am.content ?? '' })
            i++
            continue
          }

          // Accumulate thinking from any assistant turn (first one wins for display)
          if (am.thinking && !thinking) {
            thinking = { text: am.thinking, done: true }
          }

          if (am.tool_calls?.length) {
            // Build a lookup map for this round's tool calls
            const callMap = {}
            for (const tc of am.tool_calls) {
              const card = {
                kind: 'tool',
                id: tc.id,
                name: tc.name,
                args: tc.arguments ?? {},
                result: null,
                success: true,
                pending: false,
                isSubagent: false,
                steps: []
              }
              tools.push(card)
              callMap[tc.id] = card
            }
            i++
            // Collect matching tool results
            while (i < raw.length && raw[i].role === 'tool') {
              const tr = raw[i]
              const card = callMap[tr.tool_call_id]
              if (card) {
                card.result = tr.content ?? ''
                card.success = !tr.content?.startsWith('Error:')
                card.metadata = tr.metadata ?? {}
              }
              i++
            }
          } else {
            const content = am.content ?? ''
            if (!time && am.created_at) time = am.created_at
            i++
            if (content) {
              // Non-empty text → this is the final response
              text = content
              break
            }
            // Empty content: if no more assistant messages follow, we're done
            if (i >= raw.length || raw[i].role !== 'assistant') {
              break
            }
            // Otherwise skip this empty intermediate message and keep accumulating
          }
        }

        // Only add the message if there's something to show
        if (thinking || tools.length || text) {
          const msgId = `h_${firstIdx}`
          // Scan the group for metrics (saved on the final assistant turn)
          let elapsed_seconds = null, tool_call_count = null, token_count = null
          for (let j = firstIdx; j < i; j++) {
            const am = raw[j]
            if (!am || am.role !== 'assistant') continue
            if (am.elapsed_seconds != null) elapsed_seconds = am.elapsed_seconds
            if (am.tool_call_count != null) tool_call_count = am.tool_call_count
            if (am.token_count     != null) token_count     = am.token_count
          }
          result.push({ id: msgId, role: 'assistant', type: 'history', thinking, tools, text, done: true, time, elapsed_seconds, tool_call_count, token_count, rawIndices: Array.from({ length: i - firstIdx }, (_, j) => firstIdx + j) })
        }
        continue
      }

      // Orphan tool message (shouldn't happen)
      i++
    }

    return result
  }

  return {
    currentId,
    currentProfileId,
    messages,
    streaming,
    pendingImages,
    pendingFiles,
    artifacts,
    files,
    contextTokens,
    maxContextTokens,
    loading,
    streamingMsg,
    scrollToMessageIndex,
    recall,
    archiveLoading,
    archiveHasMore,
    archiveNextBefore,
    loadSession,
    clearSession,
    reloadSession,
    fetchArtifacts,
    fetchFiles,
    rateMessage,
    saveDraft,
    loadDraft,
    appendUserMessage,
    buildMessageList,
    onStreamStart,
    onReplayStart,
    onReplayEnd,
    onThinkingDelta,
    onThinkingEnd,
    onTurnThinking,
    onPlanningStatus,
    onPlanReady,
    onToolStarted,
    onToolCall,
    onStreamDelta,
    onStreamEnd,
    onStreamStopped,
    onProfileSwitched,
    onCompressionStarted,
    onContextCompressed,
    onError,
    loadRecall,
    onRecallUpdate,
    loadArchivedMessages,
    initArchiveState
  }
})