Newer
Older
navi-1 / navi / tools / image_view.py
"""Image view tool — load an image from a file path or URL for the LLM to analyse.

Images are resized to max 1024 px on the longest side and converted to JPEG
(~85 quality) before base64 encoding to keep context size reasonable.

The processed image is returned as base64 and injected into the conversation
so the LLM can actually see it (not just read a text description of it).
"""

import asyncio
import base64
import io
import mimetypes
from pathlib import Path

import httpx
from PIL import Image

from ._internal.base import Tool, ToolResult

_TIMEOUT = 30
_SUPPORTED = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
_MAX_SIZE = 1024
_JPEG_QUALITY = 85


class ImageViewTool(Tool):
    name = "image_view"
    description = (
        "Load an image from a local file path or HTTP/HTTPS URL so you can see and analyse it. "
        "Use this whenever the conversation references an image you cannot already see — "
        "a file path, a URL, a screenshot you produced, or any visual you need to inspect. "
        "Images the user attached directly to a message (visible inline in your context) "
        "don't need this tool; just analyse them from what you see. "
        "The loaded image becomes visible to you in the next message, but it is NOT shown to the user. "
        "Do not assume the user has seen it unless you publish or share it through another tool."
    )
    parameters = {
        "type": "object",
        "properties": {
            "source": {
                "type": "string",
                "description": "Absolute file path (e.g. /home/user/photo.jpg) or HTTP/HTTPS URL",
            },
        },
        "required": ["source"],
    }

    async def execute(self, params: dict) -> ToolResult:
        source = params["source"].strip()
        try:
            if source.startswith(("http://", "https://")):
                raw, mime = await self._fetch_url(source)
            else:
                raw, mime = await self._read_file(source)

            processed, mime = await asyncio.to_thread(self._preprocess, raw)
            b64 = base64.b64encode(processed).decode()
            size_kb = len(processed) // 1024
            return ToolResult(
                success=True,
                output=(
                    f"Image loaded ({size_kb} KB, {mime}). It will appear in your next turn. "
                    "The user cannot see this image from image_view alone."
                ),
                metadata={"base64": b64, "mime": mime, "is_image": True},
            )
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to load image: {e}", error=str(e))

    async def _fetch_url(self, url: str) -> tuple[bytes, str]:
        async with httpx.AsyncClient(timeout=_TIMEOUT, follow_redirects=True) as client:
            r = await client.get(url)
            r.raise_for_status()
        mime = r.headers.get("content-type", "image/jpeg").split(";")[0].strip()
        if not mime.startswith("image/") or mime == "image/svg+xml":
            raise ValueError(f"URL returned non-raster image content-type: {mime}")
        return r.content, mime

    async def _read_file(self, path_str: str) -> tuple[bytes, str]:
        path = Path(path_str).expanduser().resolve()
        if not path.exists():
            raise FileNotFoundError(f"File not found: {path}")
        if path.suffix.lower() not in _SUPPORTED:
            raise ValueError(f"Unsupported image format: {path.suffix}")
        mime = mimetypes.guess_type(str(path))[0] or "image/jpeg"
        raw = await asyncio.to_thread(path.read_bytes)
        return raw, mime

    @staticmethod
    def _preprocess(raw: bytes) -> tuple[bytes, str]:
        """Resize to _MAX_SIZE on longest side, convert to JPEG, return (bytes, mime)."""
        img = Image.open(io.BytesIO(raw))
        img = img.convert("RGB")
        w, h = img.size
        if w > _MAX_SIZE or h > _MAX_SIZE:
            ratio = _MAX_SIZE / max(w, h)
            new_size = (int(w * ratio), int(h * ratio))
            img = img.resize(new_size, Image.LANCZOS)
        buf = io.BytesIO()
        img.save(buf, format="JPEG", quality=_JPEG_QUALITY, optimize=True)
        return buf.getvalue(), "image/jpeg"