Newer
Older
navi-1 / navi / llm / base.py
"""
Canonical types and abstract base class for LLM backends.

All backends translate their native wire format into these types.
Message format follows the OpenAI convention (compatible with Ollama and Anthropic adapters).
"""

from abc import ABC, abstractmethod
from typing import AsyncGenerator, Literal

from pydantic import BaseModel


class ToolCallRequest(BaseModel):
    """A tool call requested by the LLM."""

    id: str
    name: str
    arguments: dict


class ToolSchema(BaseModel):
    """Tool definition sent to the LLM."""

    type: str = "function"
    function: dict  # {name: str, description: str, parameters: JSON Schema}


class Message(BaseModel):
    """Canonical message format (OpenAI-compatible)."""

    role: Literal["system", "user", "assistant", "tool"]
    content: str | None = None
    # set by assistant when requesting tool calls
    tool_calls: list[ToolCallRequest] | None = None
    # set on tool result messages
    tool_call_id: str | None = None
    name: str | None = None  # tool name on tool result messages


class LLMResponse(BaseModel):
    """Non-streaming response from an LLM backend."""

    content: str | None
    tool_calls: list[ToolCallRequest] | None
    finish_reason: str  # "stop" | "tool_calls" | "length"


class LLMChunk(BaseModel):
    """A single chunk from a streaming LLM response."""

    delta: str | None = None
    finish_reason: str | None = None  # "stop" | "length"; None while streaming


class LLMBackend(ABC):
    """Abstract base class for LLM backends."""

    @abstractmethod
    async def complete(
        self,
        messages: list[Message],
        tools: list[ToolSchema] | None = None,
        temperature: float = 0.7,
    ) -> LLMResponse:
        """Single-shot completion. Used in the agent tool-calling loop."""

    @abstractmethod
    async def stream(
        self,
        messages: list[Message],
        temperature: float = 0.7,
    ) -> AsyncGenerator[LLMChunk, None]:
        """Streaming text completion (no tool calling). Used for final response streaming."""