diff --git a/docs/auth.md b/docs/auth.md index 92aa7d6..bf5e37a 100644 --- a/docs/auth.md +++ b/docs/auth.md @@ -500,6 +500,31 @@ --- +## Legacy data migration + +Sessions, memory facts, and summaries created **before** multi-user auth have `user_id = NULL`. They remain visible **only to admins** until explicitly assigned to a user. + +To transfer legacy data to a specific user after their first login: + +```bash +.venv/bin/python scripts/assign_legacy_sessions.py +``` + +The script: +1. Verifies the target user exists in `navi_users` (they must log in at least once) +2. Assigns all `user_id IS NULL` sessions to the target user +3. Assigns legacy memory facts — if the target user already has a fact with the same `(category, key)`, the newer value is kept and the duplicate is deleted +4. Assigns or merges the legacy memory summary + +**Example:** +```bash +.venv/bin/python scripts/assign_legacy_sessions.py 550e8400-e29b-41d4-a716-446655440000 +``` + +After running the script, the target user will see all previously anonymous sessions and memory in their account. + +--- + ## Testing with mocked auth Integration tests in `tests/integration/conftest.py` bypass real OAuth by overriding FastAPI dependencies: diff --git a/scripts/assign_legacy_sessions.py b/scripts/assign_legacy_sessions.py new file mode 100644 index 0000000..0124374 --- /dev/null +++ b/scripts/assign_legacy_sessions.py @@ -0,0 +1,111 @@ +"""Assign legacy (user_id=NULL) sessions, facts, and summaries to a specific user. + +Run after the target user has logged in at least once (so navi_users row exists). + +Usage: + .venv/bin/python scripts/assign_legacy_sessions.py + +Example: + .venv/bin/python scripts/assign_legacy_sessions.py 550e8400-e29b-41d4-a716-446655440000 +""" + +import asyncio +import os +import sys + +import asyncpg + + +def _get_dsn() -> str: + dsn = os.getenv("DATABASE_URL", "") + if not dsn: + raise RuntimeError("DATABASE_URL env variable is required") + return dsn + + +async def assign_legacy(user_id: str) -> None: + dsn = _get_dsn() + conn = await asyncpg.connect(dsn) + try: + # Verify user exists + row = await conn.fetchrow("SELECT id, email FROM navi_users WHERE id = $1", user_id) + if row is None: + print(f"ERROR: navi_users row not found for id={user_id}") + print(" User must log in at least once before running this script.") + sys.exit(1) + print(f"Target user: {row['email']} ({user_id})") + + # 1. Sessions + sessions_result = await conn.execute( + "UPDATE sessions SET user_id = $1 WHERE user_id IS NULL", user_id + ) + sessions_count = int(sessions_result.split()[-1]) if sessions_result.startswith("UPDATE") else 0 + print(f" Sessions assigned: {sessions_count}") + + # 2. Memory facts — handle UNIQUE(user_id, category, key) conflicts + # Strategy: if target user already has (category, key), merge (keep + # user's fact, delete legacy). Otherwise update legacy fact's user_id. + legacy_facts = await conn.fetch( + "SELECT id, category, key, value, updated_at FROM memory_facts WHERE user_id IS NULL" + ) + assigned = 0 + merged = 0 + skipped = 0 + for fact in legacy_facts: + existing = await conn.fetchrow( + "SELECT id, value, updated_at FROM memory_facts WHERE user_id = $1 AND category = $2 AND key = $3", + user_id, fact["category"], fact["key"] + ) + if existing: + # Merge: keep the newer value, delete legacy + if fact["updated_at"] > existing["updated_at"]: + await conn.execute( + "UPDATE memory_facts SET value = $1, updated_at = $2, source = $3 WHERE id = $4", + fact["value"], fact["updated_at"], fact.get("source", "conversation"), existing["id"] + ) + await conn.execute("DELETE FROM memory_facts WHERE id = $1", fact["id"]) + merged += 1 + else: + await conn.execute( + "UPDATE memory_facts SET user_id = $1 WHERE id = $2", user_id, fact["id"] + ) + assigned += 1 + print(f" Memory facts assigned: {assigned}, merged: {merged}") + + # 3. Memory summary + # If target user already has a summary, merge (concatenate with separator). + legacy_summary = await conn.fetchrow( + "SELECT id, content, generated_at FROM memory_summary WHERE user_id IS NULL" + ) + if legacy_summary: + user_summary = await conn.fetchrow( + "SELECT id, content FROM memory_summary WHERE user_id = $1", user_id + ) + if user_summary: + combined = f"{user_summary['content']}\n\n--- Legacy ---\n\n{legacy_summary['content']}" + await conn.execute( + "UPDATE memory_summary SET content = $1, generated_at = $2 WHERE id = $3", + combined, legacy_summary["generated_at"], user_summary["id"] + ) + await conn.execute("DELETE FROM memory_summary WHERE id = $1", legacy_summary["id"]) + print(" Memory summary: merged with existing user summary") + else: + await conn.execute( + "UPDATE memory_summary SET user_id = $1 WHERE id = $2", + user_id, legacy_summary["id"] + ) + print(" Memory summary: assigned") + else: + print(" Memory summary: no legacy rows") + + print("\nDone. Legacy data has been assigned.") + finally: + await conn.close() + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(__doc__) + sys.exit(1) + target_user_id = sys.argv[1] + asyncio.run(assign_legacy(target_user_id))