diff --git a/client/index.html b/client/index.html
index 3776b26..834febd 100644
--- a/client/index.html
+++ b/client/index.html
@@ -36,15 +36,18 @@
diff --git a/client/js/app.js b/client/js/app.js
index 33e0ec7..4aa27bb 100644
--- a/client/js/app.js
+++ b/client/js/app.js
@@ -7,23 +7,25 @@
// ββ DOM refs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
-const profileSelect = document.getElementById('profile-select');
-const btnNew = document.getElementById('btn-new');
-const sessionListEl = document.getElementById('session-list');
-const chatHeaderEl = document.getElementById('chat-header');
-const messagesEl = document.getElementById('messages');
-const textarea = document.getElementById('input');
-const btnSend = document.getElementById('btn-send');
+const profileSelect = document.getElementById('profile-select');
+const btnNew = document.getElementById('btn-new');
+const sessionListEl = document.getElementById('session-list');
+const chatHeaderEl = document.getElementById('chat-header');
+const messagesEl = document.getElementById('messages');
+const textarea = document.getElementById('input');
+const btnSend = document.getElementById('btn-send');
+const btnAttach = document.getElementById('btn-attach');
+const fileInput = document.getElementById('file-input');
+const previewStrip = document.getElementById('image-preview-strip');
// ββ State βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
-const STORAGE_KEY = 'navi_current_session';
-
let profiles = [];
let sessions = [];
-let currentId = localStorage.getItem(STORAGE_KEY) ?? null;
+let currentId = null;
let streaming = false;
let currentBubble = null;
+let pendingImages = []; // array of full data URLs (data:image/...;base64,...)
const ws = new WsClient();
@@ -32,8 +34,11 @@
async function init() {
textarea.addEventListener('keydown', onKey);
textarea.addEventListener('input', autoResize);
+ textarea.addEventListener('paste', onPaste);
btnSend.addEventListener('click', sendMessage);
btnNew.addEventListener('click', newChat);
+ btnAttach.addEventListener('click', () => fileInput.click());
+ fileInput.addEventListener('change', onFileChange);
[profiles, sessions] = await Promise.all([api.getProfiles(), api.getSessions()]);
@@ -43,11 +48,15 @@
renderProfiles(profileSelect, profiles);
rerenderSidebar();
- // Restore last active session
- if (currentId && sessions.some(s => s.session_id === currentId)) {
- await openSession(currentId, false);
+ // Open session from URL hash, or fall back to most recently active
+ const hashId = location.hash.slice(1);
+ const targetId = hashId && sessions.some(s => s.session_id === hashId)
+ ? hashId
+ : sessions[0]?.session_id ?? null;
+
+ if (targetId) {
+ await openSession(targetId, false);
} else {
- currentId = null;
showEmptyState(messagesEl);
setInputEnabled(false);
}
@@ -72,7 +81,7 @@
async function openSession(sessionId, skipLoad = false) {
ws.disconnect();
currentId = sessionId;
- localStorage.setItem(STORAGE_KEY, sessionId);
+ history.replaceState(null, '', '#' + sessionId);
rerenderSidebar();
const s = sessions.find(s => s.session_id === sessionId);
@@ -92,12 +101,38 @@
messagesEl.innerHTML = '';
try {
const data = await api.getSession(sessionId);
+
+ // Build a lookup: tool_call_id β {name, arguments} from assistant tool_calls
+ const toolCallMap = {};
for (const msg of data.messages) {
- if (msg.role === 'system') continue;
- if (msg.role === 'user' || (msg.role === 'assistant' && msg.content)) {
- appendMessage(messagesEl, msg.role, msg.content);
+ if (msg.role === 'assistant' && msg.tool_calls) {
+ for (const tc of msg.tool_calls) {
+ toolCallMap[tc.id] = { name: tc.name, args: tc.arguments ?? {} };
+ }
}
}
+
+ for (const msg of data.messages) {
+ if (msg.role === 'system') continue;
+
+ if (msg.role === 'tool') {
+ const tc = toolCallMap[msg.tool_call_id] ?? { name: msg.name ?? '?', args: {} };
+ const success = !msg.content?.startsWith('Error:');
+ appendToolCall(messagesEl, {
+ tool: tc.name,
+ args: tc.args,
+ result: msg.content ?? '',
+ success,
+ });
+ continue;
+ }
+
+ if (msg.role === 'user' || (msg.role === 'assistant' && msg.content)) {
+ const imgs = msg.images?.map(b => b.startsWith('data:') ? b : `data:image/jpeg;base64,${b}`) ?? null;
+ appendMessage(messagesEl, msg.role, msg.content, imgs, msg.created_at ?? null);
+ }
+ }
+
scrollToBottom(messagesEl);
} catch (e) {
console.error('loadHistory', e);
@@ -110,7 +145,7 @@
if (currentId === sessionId) {
ws.disconnect();
currentId = null;
- localStorage.removeItem(STORAGE_KEY);
+ history.replaceState(null, '', location.pathname);
showEmptyState(messagesEl);
updateChatHeader(chatHeaderEl, null);
setInputEnabled(false);
@@ -140,15 +175,16 @@
switch (event.type) {
case 'stream_start':
streaming = true;
- removeTypingIndicator(messagesEl);
- currentBubble = appendStreamBubble(messagesEl);
+ currentBubble = null; // bubble created lazily on first delta, so tool cards appear first
break;
case 'stream_delta':
- if (currentBubble) {
- currentBubble.textContent += event.delta;
- scrollToBottom(messagesEl);
+ if (!currentBubble) {
+ removeTypingIndicator(messagesEl); // remove only when text actually starts
+ currentBubble = appendStreamBubble(messagesEl);
}
+ currentBubble.textContent += event.delta;
+ scrollToBottom(messagesEl);
break;
case 'tool_call':
@@ -171,16 +207,17 @@
function finishStream(finalContent) {
streaming = false;
- removeTypingIndicator(messagesEl);
- if (currentBubble) {
- if (finalContent !== undefined) {
- finalizeStreamBubble(currentBubble, finalContent);
- updatePreview(currentId, finalContent);
- } else {
- currentBubble.classList.remove('cursor');
+ removeTypingIndicator(messagesEl); // safe to call even if already removed
+ if (finalContent !== undefined) {
+ if (!currentBubble) {
+ currentBubble = appendStreamBubble(messagesEl);
}
- currentBubble = null;
+ finalizeStreamBubble(currentBubble, finalContent);
+ updatePreview(currentId, finalContent);
+ } else if (currentBubble) {
+ currentBubble.classList.remove('cursor');
}
+ currentBubble = null;
scrollToBottom(messagesEl);
}
@@ -188,17 +225,22 @@
async function sendMessage() {
const text = textarea.value.trim();
- if (!text || !ws.ready || streaming) return;
+ if ((!text && !pendingImages.length) || !ws.ready || streaming) return;
+ const imagesToSend = [...pendingImages]; // full data URLs
+ clearImages();
textarea.value = '';
autoResize();
setInputEnabled(false);
- appendMessage(messagesEl, 'user', text);
+ // Display with full data URLs
+ appendMessage(messagesEl, 'user', text || null, imagesToSend.length ? imagesToSend : null);
appendTypingIndicator(messagesEl);
scrollToBottom(messagesEl);
- ws.send(text);
+ // Strip data URI prefix before sending to server (server expects raw base64)
+ const b64List = imagesToSend.map(d => d.split(',', 2)[1]);
+ ws.send(text || ' ', b64List.length ? b64List : null);
}
function onKey(e) {
@@ -230,8 +272,9 @@
}
function setInputEnabled(on) {
- textarea.disabled = !on;
- btnSend.disabled = !on;
+ textarea.disabled = !on;
+ btnSend.disabled = !on;
+ btnAttach.disabled = !on;
if (on) textarea.focus();
}
@@ -240,6 +283,61 @@
textarea.style.height = Math.min(textarea.scrollHeight, 180) + 'px';
}
+// ββ Image handling ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+function addImageFile(file) {
+ if (!file.type.startsWith('image/')) return;
+ const reader = new FileReader();
+ reader.onload = (e) => {
+ // Store the full data URL so we retain mime type for display
+ pendingImages.push(e.target.result);
+ renderPreviewStrip();
+ };
+ reader.readAsDataURL(file);
+}
+
+function onFileChange(e) {
+ for (const file of e.target.files) addImageFile(file);
+ fileInput.value = '';
+}
+
+function onPaste(e) {
+ for (const item of e.clipboardData?.items ?? []) {
+ if (item.kind === 'file' && item.type.startsWith('image/')) {
+ e.preventDefault();
+ addImageFile(item.getAsFile());
+ }
+ }
+}
+
+function clearImages() {
+ pendingImages = [];
+ previewStrip.innerHTML = '';
+}
+
+function renderPreviewStrip() {
+ previewStrip.innerHTML = '';
+ pendingImages.forEach((dataUrl, i) => {
+ const wrap = document.createElement('div');
+ wrap.className = 'img-thumb-wrap';
+
+ const img = document.createElement('img');
+ img.src = dataUrl; // full data URL
+ img.className = 'img-thumb';
+
+ const btn = document.createElement('button');
+ btn.className = 'img-thumb-remove';
+ btn.textContent = 'Γ';
+ btn.addEventListener('click', () => {
+ pendingImages.splice(i, 1);
+ renderPreviewStrip();
+ });
+
+ wrap.append(img, btn);
+ previewStrip.appendChild(wrap);
+ });
+}
+
// ββ Start βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
init();
diff --git a/client/js/chat.js b/client/js/chat.js
index 9005d7e..a410076 100644
--- a/client/js/chat.js
+++ b/client/js/chat.js
@@ -27,6 +27,7 @@
code_exec: 'βοΈ',
terminal: 'π»',
ssh_exec: 'π§',
+ image_view: 'πΌοΈ',
};
// ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@@ -53,24 +54,40 @@
/**
* Append a complete message bubble (used for history and user messages).
* Assistant messages are rendered as markdown; user messages as plain text.
+ * Pass images (array of base64 strings) to render them in the bubble.
* Returns the bubble element.
*/
-export function appendMessage(el, role, content) {
+export function appendMessage(el, role, content, images = null, timestamp = null) {
const wrap = document.createElement('div');
wrap.className = `msg ${role}`;
const bubble = document.createElement('div');
bubble.className = 'bubble';
+ if (images?.length) {
+ const imgStrip = document.createElement('div');
+ imgStrip.className = 'bubble-images';
+ for (const b64 of images) {
+ const img = document.createElement('img');
+ img.src = b64.startsWith('data:') ? b64 : `data:image/jpeg;base64,${b64}`;
+ img.className = 'bubble-img';
+ img.alt = 'attached image';
+ imgStrip.appendChild(img);
+ }
+ bubble.appendChild(imgStrip);
+ }
+
if (role === 'assistant') {
bubble.appendChild(renderMarkdown(content));
- } else {
- bubble.textContent = content;
+ } else if (content) {
+ const text = document.createElement('span');
+ text.textContent = content;
+ bubble.appendChild(text);
}
const time = document.createElement('div');
time.className = 'msg-time';
- time.textContent = timeLabel(new Date().toISOString());
+ time.textContent = timeLabel(timestamp ?? new Date().toISOString());
wrap.append(bubble, time);
el.appendChild(wrap);
@@ -107,29 +124,42 @@
}
/**
- * Tool call card with accordion for arguments + result.
+ * Tool call card β collapsed by default, click header to toggle.
*/
export function appendToolCall(el, event) {
const icon = TOOL_ICONS[event.tool] ?? 'π§';
const success = event.success;
- // Format args as readable lines
const argsLines = Object.entries(event.args ?? {})
.map(([k, v]) => `${esc(k)}${esc(JSON.stringify(v))}`)
.join('');
- const card = document.createElement('details');
+ const card = document.createElement('div');
card.className = `tool-card${success ? '' : ' error'}`;
- card.innerHTML = `
-
-
- ${argsLines ? `
${argsLines}
` : ''}
-
${esc(event.result)}
-
`;
+
+ const header = document.createElement('div');
+ header.className = 'tool-header';
+ header.innerHTML = `
+ ${icon}
+ ${esc(event.tool)}
+ ${success ? 'β' : 'β'}`;
+
+ const body = document.createElement('div');
+ body.className = 'tool-body';
+ if (argsLines) {
+ const argsDiv = document.createElement('div');
+ argsDiv.className = 'tool-args';
+ argsDiv.innerHTML = argsLines;
+ body.appendChild(argsDiv);
+ }
+ const pre = document.createElement('pre');
+ pre.className = 'tool-result-pre';
+ pre.textContent = event.result;
+ body.appendChild(pre);
+
+ header.addEventListener('click', () => card.classList.toggle('open'));
+
+ card.append(header, body);
el.appendChild(card);
}
diff --git a/client/js/ws.js b/client/js/ws.js
index bac65c0..6761259 100644
--- a/client/js/ws.js
+++ b/client/js/ws.js
@@ -19,9 +19,11 @@
this.#ws.onmessage = (e) => handlers.onMessage?.(JSON.parse(e.data));
}
- send(content) {
+ send(content, images = null) {
if (this.#ws?.readyState === WebSocket.OPEN) {
- this.#ws.send(JSON.stringify({ type: 'message', content }));
+ const payload = { type: 'message', content };
+ if (images?.length) payload.images = images;
+ this.#ws.send(JSON.stringify(payload));
return true;
}
return false;
diff --git a/client/style.css b/client/style.css
index 4fd0434..257299d 100644
--- a/client/style.css
+++ b/client/style.css
@@ -204,6 +204,22 @@
.msg.user .bubble { background: var(--user-bubble); color: var(--user-text); border-bottom-right-radius: 3px; white-space: pre-wrap; }
.msg.assistant .bubble { background: var(--bot-bubble); color: var(--bot-text); border-bottom-left-radius: 3px; }
+/* Images inside chat bubbles */
+.bubble-images {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 6px;
+ margin-bottom: 8px;
+}
+.bubble-img {
+ max-width: 320px;
+ max-height: 240px;
+ border-radius: 6px;
+ object-fit: contain;
+ cursor: pointer;
+}
+.bubble-img:only-child { max-width: 100%; }
+
.msg-time { font-size: 11px; color: var(--text-muted); margin-top: 4px; padding: 0 2px; }
/* ββ Markdown prose βββββββββββββββββββββββββββββββββ */
@@ -218,8 +234,8 @@
.prose code { font-family: "Fira Code", "Cascadia Code", ui-monospace, monospace; font-size: 0.85em;
background: #2a2a2a; color: #e2b97e; padding: 1px 5px; border-radius: 4px; }
.prose pre { margin: 0.6em 0; border-radius: 8px; overflow: hidden; }
-.prose pre code { background: none; color: inherit; padding: 0; border-radius: 0; font-size: 0.9em; }
-.prose pre .hljs { padding: 12px 16px; border-radius: 8px; font-size: 0.9em; }
+.prose pre code { background: none; color: inherit; padding: 0; border-radius: 0; font-size: 1em; }
+.prose pre .hljs { padding: 12px 16px; border-radius: 8px; font-size: 1em; }
.prose blockquote { border-left: 3px solid #444; margin: 0.5em 0; padding: 0.2em 0 0.2em 0.8em; color: var(--text-muted); }
.prose table { border-collapse: collapse; width: 100%; margin: 0.5em 0; font-size: 0.9em; }
.prose th,.prose td { border: 1px solid #333; padding: 5px 10px; text-align: left; }
@@ -240,13 +256,9 @@
border-radius: var(--radius);
font-size: 12px;
color: var(--tool-text);
- overflow: hidden;
}
.tool-card.error { background: var(--error-bg); border-color: var(--error-border); color: var(--error-text); }
-.tool-card summary { list-style: none; }
-.tool-card summary::-webkit-details-marker { display: none; }
-
.tool-header {
display: flex;
align-items: center;
@@ -255,20 +267,24 @@
cursor: pointer;
user-select: none;
font-weight: 600;
+ border-radius: var(--radius);
}
.tool-header:hover { background: rgba(255,255,255,0.04); }
.tool-icon { font-size: 14px; }
.tool-name { flex: 1; }
.tool-status { font-size: 13px; opacity: 0.8; }
-.tool-card:not([open]) .tool-header::after { content: 'βΊ'; font-size: 16px; opacity: 0.5; }
-.tool-card[open] .tool-header::after { content: 'βΉ'; font-size: 16px; opacity: 0.5; }
+.tool-card:not(.open) .tool-header::after { content: 'βΊ'; font-size: 16px; opacity: 0.5; }
+.tool-card.open .tool-header::after { content: 'βΉ'; font-size: 16px; opacity: 0.5; }
.tool-body {
border-top: 1px solid var(--tool-border);
padding: 8px 12px;
- display: flex;
+ display: none;
flex-direction: column;
gap: 6px;
+}
+.tool-card.open .tool-body {
+ display: flex;
animation: fadeSlide 0.18s ease;
}
@keyframes fadeSlide { from { opacity: 0; transform: translateY(-4px); } to { opacity: 1; transform: translateY(0); } }
@@ -346,14 +362,78 @@
/* ββ Input bar βββββββββββββββββββββββββββββββββββββββββ */
.input-bar {
- padding: 16px 20px;
+ padding: 12px 20px 16px;
background: var(--sidebar-bg);
border-top: 1px solid var(--border);
display: flex;
+ flex-direction: column;
+ gap: 8px;
+}
+
+/* Image preview strip */
+.image-preview-strip {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 8px;
+}
+.image-preview-strip:empty { display: none; }
+
+.img-thumb-wrap {
+ position: relative;
+ width: 72px;
+ height: 72px;
+}
+.img-thumb {
+ width: 72px;
+ height: 72px;
+ object-fit: cover;
+ border-radius: 6px;
+ border: 1px solid var(--border);
+}
+.img-thumb-remove {
+ position: absolute;
+ top: -6px;
+ right: -6px;
+ width: 18px;
+ height: 18px;
+ border-radius: 50%;
+ border: none;
+ background: var(--error-text);
+ color: #fff;
+ font-size: 12px;
+ line-height: 1;
+ cursor: pointer;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ padding: 0;
+}
+
+/* Input row: attach + textarea + send */
+.input-row {
+ display: flex;
gap: 10px;
align-items: flex-end;
}
+.btn-attach {
+ width: 44px;
+ height: 44px;
+ flex-shrink: 0;
+ background: var(--input-bg);
+ color: var(--text-muted);
+ border: 1px solid var(--border);
+ border-radius: var(--radius);
+ font-size: 18px;
+ cursor: pointer;
+ transition: border-color 0.15s, color 0.15s;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+}
+.btn-attach:hover { border-color: var(--accent); color: var(--accent); }
+.btn-attach:disabled { opacity: 0.5; cursor: not-allowed; }
+
.input-bar textarea {
flex: 1;
padding: 10px 14px;
diff --git a/navi/api/routes/sessions.py b/navi/api/routes/sessions.py
index 473e1a6..bfd9b45 100644
--- a/navi/api/routes/sessions.py
+++ b/navi/api/routes/sessions.py
@@ -77,7 +77,7 @@
return {
"session_id": session.id,
"profile_id": session.profile_id,
- "messages": [m.model_dump(exclude_none=True) for m in session.messages],
+ "messages": [m.model_dump(mode='json', exclude_none=True) for m in session.messages],
"created_at": session.created_at.isoformat(),
"last_active": session.last_active.isoformat(),
}
diff --git a/navi/api/websocket.py b/navi/api/websocket.py
index 117ce58..fa7541b 100644
--- a/navi/api/websocket.py
+++ b/navi/api/websocket.py
@@ -57,10 +57,21 @@
continue
user_content = data["content"]
+ # images: list of base64 strings (data URI prefix already stripped by client)
+ raw_images: list[str] | None = data.get("images") or None
+ if raw_images:
+ # Strip data URI prefix if client sent it with prefix
+ cleaned = []
+ for img in raw_images:
+ if "," in img and img.startswith("data:"):
+ img = img.split(",", 1)[1]
+ cleaned.append(img)
+ raw_images = cleaned
+
await websocket.send_json({"type": "stream_start"})
try:
- async for event in agent.run_stream(session_id, user_content):
+ async for event in agent.run_stream(session_id, user_content, images=raw_images):
if isinstance(event, TextDelta):
await websocket.send_json({"type": "stream_delta", "delta": event.delta})
elif isinstance(event, ToolEvent):
diff --git a/navi/core/agent.py b/navi/core/agent.py
index 6ba7582..336b793 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -17,6 +17,7 @@
import asyncio
import json
from dataclasses import dataclass
+from datetime import datetime, timezone
from typing import AsyncGenerator
import structlog
@@ -75,7 +76,7 @@
# Public interface
# ------------------------------------------------------------------
- async def run(self, session_id: str, user_message: str) -> str:
+ async def run(self, session_id: str, user_message: str, images: list[str] | None = None) -> str:
"""Non-streaming: run the full tool-calling loop and return the final text."""
session = await self._sessions.get(session_id)
if session is None:
@@ -90,7 +91,7 @@
if not session.messages:
session.messages.append(Message(role="system", content=profile.system_prompt))
- session.messages.append(Message(role="user", content=user_message))
+ session.messages.append(Message(role="user", content=user_message, images=images or None, created_at=datetime.now(timezone.utc)))
for iteration in range(profile.max_iterations):
log.debug("agent.iteration", session_id=session_id, iteration=iteration)
@@ -114,14 +115,15 @@
)
session.messages.append(assistant_msg)
- tool_results = await self._execute_tool_calls(response.tool_calls, tools)
+ tool_results, image_injections = await self._execute_tool_calls(response.tool_calls, tools)
session.messages.extend(tool_results)
+ session.messages.extend(image_injections)
await self._sessions.save(session)
raise MaxIterationsReached(profile.max_iterations)
async def run_stream(
- self, session_id: str, user_message: str
+ self, session_id: str, user_message: str, images: list[str] | None = None
) -> AsyncGenerator[AgentEvent, None]:
"""
Streaming variant. Yields AgentEvent objects:
@@ -141,7 +143,7 @@
if not session.messages:
session.messages.append(Message(role="system", content=profile.system_prompt))
- session.messages.append(Message(role="user", content=user_message))
+ session.messages.append(Message(role="user", content=user_message, images=images or None, created_at=datetime.now(timezone.utc)))
# Tool-calling loop (non-streaming)
for iteration in range(profile.max_iterations):
@@ -162,7 +164,7 @@
accumulated += chunk.delta
yield TextDelta(delta=chunk.delta)
- session.messages.append(Message(role="assistant", content=accumulated))
+ session.messages.append(Message(role="assistant", content=accumulated, created_at=datetime.now(timezone.utc)))
await self._sessions.save(session)
yield StreamEnd(full_content=accumulated)
return
@@ -175,12 +177,13 @@
)
session.messages.append(assistant_msg)
- tool_results_msgs = await self._execute_tool_calls_streaming(
+ tool_results_msgs, image_injections = await self._execute_tool_calls_streaming(
response.tool_calls, tools
)
for event, msg in tool_results_msgs:
yield event
session.messages.append(msg)
+ session.messages.extend(image_injections)
await self._sessions.save(session)
raise MaxIterationsReached(profile.max_iterations)
@@ -197,33 +200,42 @@
async def _execute_tool_calls(
self, tool_calls: list[ToolCallRequest], tools: list[Tool]
- ) -> list[Message]:
+ ) -> tuple[list[Message], list[Message]]:
tool_map = {t.name: t for t in tools}
- async def _run_one(tc: ToolCallRequest) -> Message:
+ async def _run_one(tc: ToolCallRequest) -> tuple[Message, Message | None]:
tool = tool_map.get(tc.name)
+ image_msg = None
if tool is None:
content = f"Error: tool '{tc.name}' not found."
else:
log.info("tool.execute", tool=tc.name, args=tc.arguments)
result = await tool.execute(tc.arguments)
content = result.to_message_content()
- return Message(
- role="tool",
- content=content,
- tool_call_id=tc.id,
- name=tc.name,
- )
+ if result.success and result.metadata and result.metadata.get("is_image"):
+ b64 = result.metadata.get("base64")
+ if b64:
+ image_msg = Message(
+ role="user",
+ content=f"[Image loaded via {tc.name} β analyse it]",
+ images=[b64],
+ )
+ tool_msg = Message(role="tool", content=content, tool_call_id=tc.id, name=tc.name)
+ return tool_msg, image_msg
- return await asyncio.gather(*[_run_one(tc) for tc in tool_calls])
+ pairs = await asyncio.gather(*[_run_one(tc) for tc in tool_calls])
+ tool_msgs = [p[0] for p in pairs]
+ image_msgs = [p[1] for p in pairs if p[1] is not None]
+ return tool_msgs, image_msgs
async def _execute_tool_calls_streaming(
self, tool_calls: list[ToolCallRequest], tools: list[Tool]
- ) -> list[tuple[ToolEvent, Message]]:
+ ) -> tuple[list[tuple[ToolEvent, Message]], list[Message]]:
tool_map = {t.name: t for t in tools}
- async def _run_one(tc: ToolCallRequest) -> tuple[ToolEvent, Message]:
+ async def _run_one(tc: ToolCallRequest) -> tuple[ToolEvent, Message, Message | None]:
tool = tool_map.get(tc.name)
+ image_msg = None
if tool is None:
content = f"Error: tool '{tc.name}' not found."
event = ToolEvent(
@@ -239,7 +251,18 @@
result=content,
success=result.success,
)
+ if result.success and result.metadata and result.metadata.get("is_image"):
+ b64 = result.metadata.get("base64")
+ if b64:
+ image_msg = Message(
+ role="user",
+ content=f"[Image loaded via {tc.name} β analyse it]",
+ images=[b64],
+ )
msg = Message(role="tool", content=content, tool_call_id=tc.id, name=tc.name)
- return event, msg
+ return event, msg, image_msg
- return await asyncio.gather(*[_run_one(tc) for tc in tool_calls])
+ triples = await asyncio.gather(*[_run_one(tc) for tc in tool_calls])
+ pairs = [(t[0], t[1]) for t in triples]
+ image_msgs = [t[2] for t in triples if t[2] is not None]
+ return pairs, image_msgs
diff --git a/navi/core/registry.py b/navi/core/registry.py
index 0765a98..6ecbb23 100644
--- a/navi/core/registry.py
+++ b/navi/core/registry.py
@@ -10,6 +10,7 @@
CodeExecTool,
FilesystemTool,
HttpRequestTool,
+ ImageViewTool,
SshExecTool,
TerminalTool,
Tool,
@@ -79,6 +80,7 @@
tools.register(CodeExecTool())
tools.register(TerminalTool())
tools.register(SshExecTool())
+ tools.register(ImageViewTool())
profiles = ProfileRegistry()
for p in ALL_PROFILES:
diff --git a/navi/core/sqlite_session_store.py b/navi/core/sqlite_session_store.py
index d68a2d8..736af6b 100644
--- a/navi/core/sqlite_session_store.py
+++ b/navi/core/sqlite_session_store.py
@@ -59,7 +59,7 @@
async def save(self, session: Session) -> None:
session.last_active = datetime.utcnow()
messages_json = json.dumps(
- [m.model_dump(exclude_none=True) for m in session.messages],
+ [m.model_dump(mode='json', exclude_none=True) for m in session.messages],
ensure_ascii=False,
)
async with aiosqlite.connect(self._db_path) as db:
diff --git a/navi/llm/base.py b/navi/llm/base.py
index d9695f8..fea77d0 100644
--- a/navi/llm/base.py
+++ b/navi/llm/base.py
@@ -6,6 +6,7 @@
"""
from abc import ABC, abstractmethod
+from datetime import datetime, timezone
from typing import AsyncGenerator, Literal
from pydantic import BaseModel
@@ -31,11 +32,14 @@
role: Literal["system", "user", "assistant", "tool"]
content: str | None = None
+ # base64-encoded images (multimodal); user and assistant roles only
+ images: list[str] | None = None
# set by assistant when requesting tool calls
tool_calls: list[ToolCallRequest] | None = None
# set on tool result messages
tool_call_id: str | None = None
name: str | None = None # tool name on tool result messages
+ created_at: datetime | None = None
class LLMResponse(BaseModel):
diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py
index 1aeef02..281ec87 100644
--- a/navi/llm/ollama.py
+++ b/navi/llm/ollama.py
@@ -14,14 +14,13 @@
result = []
for m in messages:
msg: dict = {"role": m.role, "content": m.content or ""}
+ if m.images:
+ msg["images"] = m.images # list of base64 strings, Ollama format
if m.tool_calls:
msg["tool_calls"] = [
{"function": {"name": tc.name, "arguments": tc.arguments}}
for tc in m.tool_calls
]
- if m.tool_call_id:
- # Ollama uses role="tool" with content
- pass
result.append(msg)
return result
diff --git a/navi/main.py b/navi/main.py
index 789dde1..2749e46 100644
--- a/navi/main.py
+++ b/navi/main.py
@@ -1,8 +1,8 @@
"""FastAPI application entry point."""
import structlog
-from fastapi import FastAPI
-from fastapi.responses import FileResponse
+from fastapi import FastAPI, Request
+from fastapi.responses import FileResponse, Response
from fastapi.staticfiles import StaticFiles
from navi.api.routes import agents, health, messages, sessions
@@ -30,6 +30,14 @@
app.mount("/static", StaticFiles(directory="client"), name="static")
+@app.middleware("http")
+async def no_cache_static(request: Request, call_next) -> Response:
+ response = await call_next(request)
+ if request.url.path.startswith("/static/"):
+ response.headers["Cache-Control"] = "no-store"
+ return response
+
+
@app.get("/", include_in_schema=False)
async def index() -> FileResponse:
- return FileResponse("client/index.html")
+ return FileResponse("client/index.html", headers={"Cache-Control": "no-store"})
diff --git a/navi/profiles/secretary.py b/navi/profiles/secretary.py
index 9d8456a..ba47e1f 100644
--- a/navi/profiles/secretary.py
+++ b/navi/profiles/secretary.py
@@ -12,12 +12,13 @@
- HTTP requests to query external APIs or services
- Filesystem to read and write documents, notes, and files
- Code execution to perform calculations, data processing, or automate tasks
+- image_view: load and analyse images from a local file path or URL
Be concise and actionable in your responses. When asked to research a topic,
provide a structured summary with sources. When writing documents, match the
requested tone and format.
""",
- enabled_tools=["web_search", "http_request", "filesystem", "code_exec"],
+ enabled_tools=["web_search", "http_request", "filesystem", "code_exec", "terminal", "image_view"],
model="gemma4:e2b-it-q4_K_M",
temperature=0.7,
)
diff --git a/navi/profiles/server_admin.py b/navi/profiles/server_admin.py
index 9ba485f..ad79b44 100644
--- a/navi/profiles/server_admin.py
+++ b/navi/profiles/server_admin.py
@@ -16,6 +16,7 @@
- filesystem: read and write files on the local machine
- http_request: call REST APIs, monitoring endpoints, or health checks
- web_search: look up documentation, error messages, or solutions
+- image_view: load and analyse images from a local file path or URL
Guidelines:
- When the user asks about a remote server, use ssh_exec immediately β do not say
@@ -23,7 +24,7 @@
- Prefer non-destructive operations; ask for confirmation before anything irreversible.
- When troubleshooting, gather information first (logs, status) before making changes.
""",
- enabled_tools=["terminal", "filesystem", "http_request", "web_search", "ssh_exec"],
+ enabled_tools=["terminal", "filesystem", "http_request", "web_search", "ssh_exec", "image_view"],
model="gemma4:e2b-it-q4_K_M",
temperature=0.2,
)
diff --git a/navi/profiles/smart_home.py b/navi/profiles/smart_home.py
index 1b4bd1b..a48a621 100644
--- a/navi/profiles/smart_home.py
+++ b/navi/profiles/smart_home.py
@@ -15,11 +15,12 @@
- ssh_exec: execute commands on remote hosts via SSH. Pass host, username, password
(and optionally port, key_path) directly as tool parameters β no config file needed.
ALWAYS use it for any task involving a remote host.
+- image_view: load and analyse images from a local file path or URL
Always confirm before making irreversible changes to device state or automation configuration.
When writing automations, prefer clear, well-commented YAML.
""",
- enabled_tools=["http_request", "filesystem", "code_exec", "terminal", "ssh_exec"],
+ enabled_tools=["http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view"],
model="gemma4:e2b-it-q4_K_M",
temperature=0.3,
)
diff --git a/navi/tools/__init__.py b/navi/tools/__init__.py
index 5270483..eef15ec 100644
--- a/navi/tools/__init__.py
+++ b/navi/tools/__init__.py
@@ -2,6 +2,7 @@
from .code_exec import CodeExecTool
from .filesystem import FilesystemTool
from .http_request import HttpRequestTool
+from .image_view import ImageViewTool
from .ssh_exec import SshExecTool
from .terminal import TerminalTool
from .web_search import WebSearchTool
@@ -15,4 +16,5 @@
"CodeExecTool",
"TerminalTool",
"SshExecTool",
+ "ImageViewTool",
]
diff --git a/navi/tools/image_view.py b/navi/tools/image_view.py
new file mode 100644
index 0000000..5f72352
--- /dev/null
+++ b/navi/tools/image_view.py
@@ -0,0 +1,69 @@
+"""Image view tool β load an image from a file path or URL for the LLM to analyse.
+
+The image is returned as base64 and injected into the conversation so the LLM
+can actually see it (not just read a text description of it).
+"""
+
+import base64
+import mimetypes
+from pathlib import Path
+
+import httpx
+
+from .base import Tool, ToolResult
+
+_TIMEOUT = 30
+_SUPPORTED = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
+
+
+class ImageViewTool(Tool):
+ name = "image_view"
+ description = (
+ "Load an image from a local file path or a URL so you can see and analyse it. "
+ "Supports JPEG, PNG, GIF, WebP. After calling this tool the image will be "
+ "visible to you in the next turn."
+ )
+ parameters = {
+ "type": "object",
+ "properties": {
+ "source": {
+ "type": "string",
+ "description": "Absolute file path (e.g. /home/user/photo.jpg) or HTTP/HTTPS URL",
+ },
+ },
+ "required": ["source"],
+ }
+
+ async def execute(self, params: dict) -> ToolResult:
+ source = params["source"].strip()
+ try:
+ if source.startswith(("http://", "https://")):
+ raw, mime = await self._fetch_url(source)
+ else:
+ raw, mime = self._read_file(source)
+
+ b64 = base64.b64encode(raw).decode()
+ size_kb = len(raw) // 1024
+ return ToolResult(
+ success=True,
+ output=f"Image loaded ({size_kb} KB, {mime}). It will appear in the next turn.",
+ metadata={"base64": b64, "mime": mime, "is_image": True},
+ )
+ except Exception as e:
+ return ToolResult(success=False, output=f"Failed to load image: {e}", error=str(e))
+
+ async def _fetch_url(self, url: str) -> tuple[bytes, str]:
+ async with httpx.AsyncClient(timeout=_TIMEOUT, follow_redirects=True) as client:
+ r = await client.get(url)
+ r.raise_for_status()
+ mime = r.headers.get("content-type", "image/jpeg").split(";")[0].strip()
+ return r.content, mime
+
+ def _read_file(self, path_str: str) -> tuple[bytes, str]:
+ path = Path(path_str).expanduser().resolve()
+ if not path.exists():
+ raise FileNotFoundError(f"File not found: {path}")
+ if path.suffix.lower() not in _SUPPORTED:
+ raise ValueError(f"Unsupported image format: {path.suffix}")
+ mime = mimetypes.guess_type(str(path))[0] or "image/jpeg"
+ return path.read_bytes(), mime