navi-1/debug/eval/api.py at vmkdemo

Fork: 0
root / navi-1
Find file
Newer
Older
navi-1 / debug / eval / api.py
Eugene Sukhodolskiy on 26 Apr 4 KB Add eval system Phase 4 — read endpoints and background runner
Raw Blame History
"""FastAPI router for the eval system. Mounted from navi/main.py."""

from __future__ import annotations

from typing import Literal

from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field

from navi.api.deps import (
    get_backend_registry,
    get_profile_registry,
    get_session_store,
)
from navi.config import settings

from .db import EvalDB
from .judge import JUDGE_VERSION, RUBRIC_VERSION
from .runner import get_registry, start_run
from .schema import (
    RunRequest,
    RunStatus,
    SessionDetail,
    SessionOverview,
    StatsResponse,
)


router = APIRouter(prefix="/eval", tags=["eval"])


_db: EvalDB | None = None


def _get_db() -> EvalDB:
    global _db
    if _db is None:
        if not settings.database_url:
            raise HTTPException(
                status_code=503,
                detail="DATABASE_URL is not set; eval system requires postgres",
            )
        _db = EvalDB(settings.database_url)
    return _db


# ── Feedback (Phase 1) ───────────────────────────────────────────────────


class FeedbackIn(BaseModel):
    session_id: str
    message_index: int = Field(ge=0)
    # 0 = clear (delete row); -1 / +1 = thumbs down / up.
    rating: Literal[-1, 0, 1]


@router.post("/feedback")
async def set_feedback(payload: FeedbackIn) -> dict:
    db = _get_db()
    if payload.rating == 0:
        await db.clear_feedback(payload.session_id, payload.message_index)
    else:
        await db.set_feedback(
            payload.session_id, payload.message_index, payload.rating
        )
    return {"ok": True}


@router.get("/feedback/{session_id}")
async def list_feedback(session_id: str) -> dict:
    db = _get_db()
    return {"feedback": await db.list_feedback(session_id)}


# ── Sessions overview / detail ───────────────────────────────────────────


@router.get("/sessions", response_model=list[SessionOverview])
async def list_sessions(
    limit: int = Query(50, ge=1, le=500),
    offset: int = Query(0, ge=0),
    profile: str | None = None,
    status: Literal["evaluated", "pending", "stale"] | None = None,
) -> list[SessionOverview]:
    db = _get_db()
    rows = await db.list_sessions_overview(
        judge_version=JUDGE_VERSION,
        rubric_version=RUBRIC_VERSION,
        limit=limit,
        offset=offset,
        profile=profile,
        status=status,
    )
    return [SessionOverview.model_validate(r) for r in rows]


@router.get("/sessions/{session_id}", response_model=SessionDetail)
async def get_session_detail(session_id: str) -> SessionDetail:
    db = _get_db()
    session_store = get_session_store()
    session = await session_store.get(session_id)
    if session is None:
        raise HTTPException(404, f"session not found: {session_id}")

    feedback = await db.list_feedback(session_id)
    evaluations = await db.list_evaluations(session_id)

    return SessionDetail(
        session_id=session.id,
        profile_id=session.profile_id,
        name=session.name,
        created_at=session.created_at,
        last_active=session.last_active,
        msg_count=len(session.messages),
        feedback=feedback,
        evaluations=evaluations,
    )


# ── Stats ────────────────────────────────────────────────────────────────


@router.get("/stats", response_model=StatsResponse)
async def get_stats(
    days: int = Query(30, ge=1, le=365),
    by_complexity_bucket: bool = False,
) -> StatsResponse:
    db = _get_db()
    raw = await db.aggregate_stats(
        judge_version=JUDGE_VERSION,
        rubric_version=RUBRIC_VERSION,
        days=days,
        by_complexity_bucket=by_complexity_bucket,
    )
    return StatsResponse.model_validate(raw)


# ── Run trigger / status (background tasks) ─────────────────────────────


@router.post("/run", response_model=RunStatus)
async def trigger_run(req: RunRequest) -> RunStatus:
    db = _get_db()
    return start_run(
        req=req,
        db=db,
        session_store=get_session_store(),
        backend_registry=get_backend_registry(),
        profile_registry=get_profile_registry(),
    )


@router.get("/run/{run_id}", response_model=RunStatus)
async def get_run_status(run_id: str) -> RunStatus:
    status = get_registry().get(run_id)
    if status is None:
        raise HTTPException(404, f"run not found: {run_id}")
    return status


@router.get("/runs", response_model=list[RunStatus])
async def list_runs() -> list[RunStatus]:
    return get_registry().list_runs()