"""Image view tool — load an image from a file path or URL for the LLM to analyse.
Images are resized to max 1024 px on the longest side and converted to JPEG
(~85 quality) before base64 encoding to keep context size reasonable.
The processed image is returned as base64 and injected into the conversation
so the LLM can actually see it (not just read a text description of it).
"""
import asyncio
import base64
import io
import mimetypes
from pathlib import Path
import httpx
from PIL import Image
from ._internal.base import Tool, ToolResult
_TIMEOUT = 30
_SUPPORTED = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
_MAX_SIZE = 1024
_JPEG_QUALITY = 85
class ImageViewTool(Tool):
name = "image_view"
description = (
"Load an image from a local file path or HTTP/HTTPS URL so you can see and analyse it. "
"Use this whenever the conversation references an image you cannot already see — "
"a file path, a URL, a screenshot you produced, or any visual you need to inspect. "
"Images the user attached directly to a message (visible inline in your context) "
"don't need this tool; just analyse them from what you see. "
"The loaded image becomes visible to you in the next message, but it is NOT shown to the user. "
"Do not assume the user has seen it unless you publish or share it through another tool."
)
parameters = {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "Absolute file path (e.g. /home/user/photo.jpg) or HTTP/HTTPS URL",
},
},
"required": ["source"],
}
async def execute(self, params: dict) -> ToolResult:
source = params["source"].strip()
try:
if source.startswith(("http://", "https://")):
raw, mime = await self._fetch_url(source)
else:
raw, mime = await self._read_file(source)
processed, mime = await asyncio.to_thread(self._preprocess, raw)
b64 = base64.b64encode(processed).decode()
size_kb = len(processed) // 1024
return ToolResult(
success=True,
output=(
f"Image loaded ({size_kb} KB, {mime}). It will appear in your next turn. "
"The user cannot see this image from image_view alone."
),
metadata={"base64": b64, "mime": mime, "is_image": True},
)
except Exception as e:
return ToolResult(success=False, output=f"Failed to load image: {e}", error=str(e))
async def _fetch_url(self, url: str) -> tuple[bytes, str]:
async with httpx.AsyncClient(timeout=_TIMEOUT, follow_redirects=True) as client:
r = await client.get(url)
r.raise_for_status()
mime = r.headers.get("content-type", "image/jpeg").split(";")[0].strip()
if not mime.startswith("image/") or mime == "image/svg+xml":
raise ValueError(f"URL returned non-raster image content-type: {mime}")
return r.content, mime
async def _read_file(self, path_str: str) -> tuple[bytes, str]:
path = Path(path_str).expanduser().resolve()
if not path.exists():
raise FileNotFoundError(f"File not found: {path}")
if path.suffix.lower() not in _SUPPORTED:
raise ValueError(f"Unsupported image format: {path.suffix}")
mime = mimetypes.guess_type(str(path))[0] or "image/jpeg"
raw = await asyncio.to_thread(path.read_bytes)
return raw, mime
@staticmethod
def _preprocess(raw: bytes) -> tuple[bytes, str]:
"""Resize to _MAX_SIZE on longest side, convert to JPEG, return (bytes, mime)."""
img = Image.open(io.BytesIO(raw))
img = img.convert("RGB")
w, h = img.size
if w > _MAX_SIZE or h > _MAX_SIZE:
ratio = _MAX_SIZE / max(w, h)
new_size = (int(w * ratio), int(h * ratio))
img = img.resize(new_size, Image.LANCZOS)
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=_JPEG_QUALITY, optimize=True)
return buf.getvalue(), "image/jpeg"