Newer
Older
navi-1 / scripts / assign_legacy_sessions.py
@Eugene Sukhodolskiy Eugene Sukhodolskiy on 4 May 4 KB Add legacy data migration script and docs
"""Assign legacy (user_id=NULL) sessions, facts, and summaries to a specific user.

Run after the target user has logged in at least once (so navi_users row exists).

Usage:
    .venv/bin/python scripts/assign_legacy_sessions.py <user_id>

Example:
    .venv/bin/python scripts/assign_legacy_sessions.py 550e8400-e29b-41d4-a716-446655440000
"""

import asyncio
import os
import sys

import asyncpg


def _get_dsn() -> str:
    dsn = os.getenv("DATABASE_URL", "")
    if not dsn:
        raise RuntimeError("DATABASE_URL env variable is required")
    return dsn


async def assign_legacy(user_id: str) -> None:
    dsn = _get_dsn()
    conn = await asyncpg.connect(dsn)
    try:
        # Verify user exists
        row = await conn.fetchrow("SELECT id, email FROM navi_users WHERE id = $1", user_id)
        if row is None:
            print(f"ERROR: navi_users row not found for id={user_id}")
            print("       User must log in at least once before running this script.")
            sys.exit(1)
        print(f"Target user: {row['email']} ({user_id})")

        # 1. Sessions
        sessions_result = await conn.execute(
            "UPDATE sessions SET user_id = $1 WHERE user_id IS NULL", user_id
        )
        sessions_count = int(sessions_result.split()[-1]) if sessions_result.startswith("UPDATE") else 0
        print(f"  Sessions assigned: {sessions_count}")

        # 2. Memory facts — handle UNIQUE(user_id, category, key) conflicts
        # Strategy: if target user already has (category, key), merge (keep
        # user's fact, delete legacy). Otherwise update legacy fact's user_id.
        legacy_facts = await conn.fetch(
            "SELECT id, category, key, value, updated_at FROM memory_facts WHERE user_id IS NULL"
        )
        assigned = 0
        merged = 0
        skipped = 0
        for fact in legacy_facts:
            existing = await conn.fetchrow(
                "SELECT id, value, updated_at FROM memory_facts WHERE user_id = $1 AND category = $2 AND key = $3",
                user_id, fact["category"], fact["key"]
            )
            if existing:
                # Merge: keep the newer value, delete legacy
                if fact["updated_at"] > existing["updated_at"]:
                    await conn.execute(
                        "UPDATE memory_facts SET value = $1, updated_at = $2, source = $3 WHERE id = $4",
                        fact["value"], fact["updated_at"], fact.get("source", "conversation"), existing["id"]
                    )
                await conn.execute("DELETE FROM memory_facts WHERE id = $1", fact["id"])
                merged += 1
            else:
                await conn.execute(
                    "UPDATE memory_facts SET user_id = $1 WHERE id = $2", user_id, fact["id"]
                )
                assigned += 1
        print(f"  Memory facts assigned: {assigned}, merged: {merged}")

        # 3. Memory summary
        # If target user already has a summary, merge (concatenate with separator).
        legacy_summary = await conn.fetchrow(
            "SELECT id, content, generated_at FROM memory_summary WHERE user_id IS NULL"
        )
        if legacy_summary:
            user_summary = await conn.fetchrow(
                "SELECT id, content FROM memory_summary WHERE user_id = $1", user_id
            )
            if user_summary:
                combined = f"{user_summary['content']}\n\n--- Legacy ---\n\n{legacy_summary['content']}"
                await conn.execute(
                    "UPDATE memory_summary SET content = $1, generated_at = $2 WHERE id = $3",
                    combined, legacy_summary["generated_at"], user_summary["id"]
                )
                await conn.execute("DELETE FROM memory_summary WHERE id = $1", legacy_summary["id"])
                print("  Memory summary: merged with existing user summary")
            else:
                await conn.execute(
                    "UPDATE memory_summary SET user_id = $1 WHERE id = $2",
                    user_id, legacy_summary["id"]
                )
                print("  Memory summary: assigned")
        else:
            print("  Memory summary: no legacy rows")

        print("\nDone. Legacy data has been assigned.")
    finally:
        await conn.close()


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print(__doc__)
        sys.exit(1)
    target_user_id = sys.argv[1]
    asyncio.run(assign_legacy(target_user_id))