"""
Canonical types and abstract base class for LLM backends.
All backends translate their native wire format into these types.
Message format follows the OpenAI convention (compatible with Ollama and Anthropic adapters).
"""
from abc import ABC, abstractmethod
from datetime import datetime, timezone
from typing import AsyncGenerator, Literal
from pydantic import BaseModel
class ToolCallRequest(BaseModel):
"""A tool call requested by the LLM."""
id: str
name: str
arguments: dict
class ToolSchema(BaseModel):
"""Tool definition sent to the LLM."""
type: str = "function"
function: dict # {name: str, description: str, parameters: JSON Schema}
class Message(BaseModel):
"""Canonical message format (OpenAI-compatible)."""
role: Literal["system", "user", "assistant", "tool"]
content: str | None = None
# base64-encoded images (multimodal); user and assistant roles only
images: list[str] | None = None
# set by assistant when requesting tool calls
tool_calls: list[ToolCallRequest] | None = None
# set on tool result messages
tool_call_id: str | None = None
name: str | None = None # tool name on tool result messages
created_at: datetime | None = None
class LLMResponse(BaseModel):
"""Non-streaming response from an LLM backend."""
content: str | None
tool_calls: list[ToolCallRequest] | None
finish_reason: str # "stop" | "tool_calls" | "length"
thinking: str | None = None
class LLMChunk(BaseModel):
"""A single chunk from a streaming LLM response."""
delta: str | None = None
thinking: str | None = None
finish_reason: str | None = None # "stop" | "length"; None while streaming
# Token counts — only present on the final chunk (finish_reason == "stop")
prompt_tokens: int | None = None
completion_tokens: int | None = None
class LLMBackend(ABC):
"""Abstract base class for LLM backends."""
@abstractmethod
async def complete(
self,
messages: list[Message],
tools: list[ToolSchema] | None = None,
temperature: float = 0.7,
model: str | None = None,
) -> LLMResponse:
"""Single-shot completion. Used in the agent tool-calling loop."""
@abstractmethod
async def stream(
self,
messages: list[Message],
temperature: float = 0.7,
model: str | None = None,
) -> AsyncGenerator[LLMChunk, None]:
"""Streaming text completion (no tool calling). Used for final response streaming."""