-- Eval system schema (eval_v1).
-- Applied lazily by debug/eval/db.py on first connection.
-- Postgres is the primary DB; sqlite path is intentionally not supported here.
-- Per-message user feedback. Drives the like/dislike signal that the judge sees.
-- Identity: (session_id, message_index) where message_index is the position in
-- session.messages (the displayed history is append-only, so indices are stable).
CREATE TABLE IF NOT EXISTS message_feedback (
session_id TEXT NOT NULL,
message_index INTEGER NOT NULL,
rating SMALLINT NOT NULL CHECK (rating IN (-1, 1)),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (session_id, message_index)
);
CREATE INDEX IF NOT EXISTS idx_message_feedback_session
ON message_feedback(session_id);
-- Reserved for Phase 2 (eval runs). Kept here so the migration is one file.
-- One row per (session, expert) inside a single eval run; aggregated via view.
CREATE TABLE IF NOT EXISTS evaluations (
id UUID PRIMARY KEY,
session_id TEXT NOT NULL,
eval_run_id UUID NOT NULL,
eval_date TIMESTAMPTZ NOT NULL DEFAULT now(),
judge_model TEXT NOT NULL,
judge_version TEXT NOT NULL,
rubric_version TEXT NOT NULL,
expert_id TEXT NOT NULL,
scores JSONB NOT NULL,
comment TEXT NOT NULL DEFAULT ''
);
CREATE INDEX IF NOT EXISTS idx_evaluations_session
ON evaluations(session_id);
CREATE INDEX IF NOT EXISTS idx_evaluations_date
ON evaluations(eval_date);
CREATE INDEX IF NOT EXISTS idx_evaluations_versions
ON evaluations(judge_version, rubric_version);