"""Backfill embeddings for existing memory_facts that lack them.
Run this once after enabling pgvector / embedding backend:
.venv/bin/python navi/memory/backfill_embeddings.py
What it does:
1. Connects to PostgreSQL via DATABASE_URL
2. Finds facts with embedding IS NULL
3. Generates embeddings in small batches via Ollama (nomic-embed-text)
4. Updates each row with its vector
Safe to run multiple times — only touches rows without embeddings.
"""
import asyncio
import sys
from navi.config import settings
from navi.llm.ollama import OllamaBackend
from navi.memory.store import MemoryStore
async def main() -> None:
if not settings.database_url:
print("No DATABASE_URL set — nothing to backfill.")
sys.exit(0)
# Build a dedicated embedding backend (may point to a local CPU server
# while the main chat backend uses Ollama Cloud).
host = settings.embedding_ollama_host or settings.ollama_host
api_key = settings.embedding_ollama_api_key or settings.ollama_api_key
backend = OllamaBackend(
model=settings.embedding_model,
host=host,
api_key=api_key,
)
store = MemoryStore(settings.database_url)
store.set_embedding_backend(backend)
print("Checking for facts without embeddings...")
count = await store.backfill_embeddings(batch_size=8)
print(f"Backfilled {count} embeddings.")
if __name__ == "__main__":
asyncio.run(main())