navi-1/navi/llm/base.py at 0aa7f6f3a8fa3618aa56419bb95dd0ea686f1faf

Fork: 0
root / navi-1
Find file
Newer
Older
navi-1 / navi / llm / base.py
Eugene Sukhodolskiy on 8 Apr 2 KB Add thinking/reasoning streaming support
Raw Blame History
"""
Canonical types and abstract base class for LLM backends.

All backends translate their native wire format into these types.
Message format follows the OpenAI convention (compatible with Ollama and Anthropic adapters).
"""

from abc import ABC, abstractmethod
from datetime import datetime, timezone
from typing import AsyncGenerator, Literal

from pydantic import BaseModel


class ToolCallRequest(BaseModel):
    """A tool call requested by the LLM."""

    id: str
    name: str
    arguments: dict


class ToolSchema(BaseModel):
    """Tool definition sent to the LLM."""

    type: str = "function"
    function: dict  # {name: str, description: str, parameters: JSON Schema}


class Message(BaseModel):
    """Canonical message format (OpenAI-compatible)."""

    role: Literal["system", "user", "assistant", "tool"]
    content: str | None = None
    # base64-encoded images (multimodal); user and assistant roles only
    images: list[str] | None = None
    # set by assistant when requesting tool calls
    tool_calls: list[ToolCallRequest] | None = None
    # set on tool result messages
    tool_call_id: str | None = None
    name: str | None = None  # tool name on tool result messages
    created_at: datetime | None = None


class LLMResponse(BaseModel):
    """Non-streaming response from an LLM backend."""

    content: str | None
    tool_calls: list[ToolCallRequest] | None
    finish_reason: str  # "stop" | "tool_calls" | "length"
    thinking: str | None = None


class LLMChunk(BaseModel):
    """A single chunk from a streaming LLM response."""

    delta: str | None = None
    thinking: str | None = None
    finish_reason: str | None = None  # "stop" | "length"; None while streaming


class LLMBackend(ABC):
    """Abstract base class for LLM backends."""

    @abstractmethod
    async def complete(
        self,
        messages: list[Message],
        tools: list[ToolSchema] | None = None,
        temperature: float = 0.7,
    ) -> LLMResponse:
        """Single-shot completion. Used in the agent tool-calling loop."""

    @abstractmethod
    async def stream(
        self,
        messages: list[Message],
        temperature: float = 0.7,
    ) -> AsyncGenerator[LLMChunk, None]:
        """Streaming text completion (no tool calling). Used for final response streaming."""