diff --git a/navi/api/deps.py b/navi/api/deps.py index 6b547a0..641e60e 100644 --- a/navi/api/deps.py +++ b/navi/api/deps.py @@ -16,6 +16,7 @@ ToolRegistry, build_default_registries, ) +from navi.llm.ollama import OllamaBackend from navi.memory import MemoryStore from navi.memory.sqlite_store import SqliteMemoryStore from navi.workers import Worker, build_default_workers @@ -48,11 +49,20 @@ memory_store=_memory_store, session_store=_session_store, ) - # Wire embedding backend into memory store for vector search + # Wire embedding backend into memory store for vector search. + # Uses a dedicated Ollama endpoint when configured, otherwise falls back + # to the main chat backend. try: - backend = _registries[2].get("ollama") + if settings.embedding_ollama_host: + emb_backend = OllamaBackend( + model=settings.embedding_model, + host=settings.embedding_ollama_host, + api_key=settings.embedding_ollama_api_key, + ) + else: + emb_backend = _registries[2].get("ollama") if hasattr(_memory_store, "set_embedding_backend"): - _memory_store.set_embedding_backend(backend) + _memory_store.set_embedding_backend(emb_backend) except Exception: pass return _registries diff --git a/navi/config.py b/navi/config.py index e6c4030..b6b0858 100644 --- a/navi/config.py +++ b/navi/config.py @@ -14,6 +14,9 @@ ollama_think: bool = True # Embedding model for memory vector search (Ollama API) + # When embedding_ollama_host is empty, falls back to ollama_host. + embedding_ollama_host: str = "" + embedding_ollama_api_key: str = "" embedding_model: str = "nomic-embed-text:latest" embedding_dimensions: int = 768 diff --git a/navi/memory/backfill_embeddings.py b/navi/memory/backfill_embeddings.py new file mode 100644 index 0000000..f90a1d1 --- /dev/null +++ b/navi/memory/backfill_embeddings.py @@ -0,0 +1,47 @@ +"""Backfill embeddings for existing memory_facts that lack them. + +Run this once after enabling pgvector / embedding backend: + .venv/bin/python navi/memory/backfill_embeddings.py + +What it does: +1. Connects to PostgreSQL via DATABASE_URL +2. Finds facts with embedding IS NULL +3. Generates embeddings in small batches via Ollama (nomic-embed-text) +4. Updates each row with its vector + +Safe to run multiple times — only touches rows without embeddings. +""" + +import asyncio +import sys + +from navi.config import settings +from navi.llm.ollama import OllamaBackend +from navi.memory.store import MemoryStore + + +async def main() -> None: + if not settings.database_url: + print("No DATABASE_URL set — nothing to backfill.") + sys.exit(0) + + # Build a dedicated embedding backend (may point to a local CPU server + # while the main chat backend uses Ollama Cloud). + host = settings.embedding_ollama_host or settings.ollama_host + api_key = settings.embedding_ollama_api_key or settings.ollama_api_key + backend = OllamaBackend( + model=settings.embedding_model, + host=host, + api_key=api_key, + ) + + store = MemoryStore(settings.database_url) + store.set_embedding_backend(backend) + + print("Checking for facts without embeddings...") + count = await store.backfill_embeddings(batch_size=8) + print(f"Backfilled {count} embeddings.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/navi/memory/store.py b/navi/memory/store.py index a670480..dabc457 100644 --- a/navi/memory/store.py +++ b/navi/memory/store.py @@ -140,6 +140,46 @@ log.warning("memory.embed_failed", text=text[:60], exc_info=True) return None + async def _generate_embeddings(self, texts: list[str]) -> list[list[float] | None]: + if not self._embedding_backend or not await self._has_pgvector(): + return [None] * len(texts) + try: + vectors = await self._embedding_backend.embed( + texts=texts, + model=settings.embedding_model, + ) + return [v if v else None for v in vectors] + except Exception: + log.warning("memory.embed_batch_failed", count=len(texts), exc_info=True) + return [None] * len(texts) + + async def backfill_embeddings(self, batch_size: int = 8) -> int: + pool = await self._get_pool() + updated = 0 + async with pool.acquire() as conn: + while True: + rows = await conn.fetch( + "SELECT id, value FROM memory_facts WHERE embedding IS NULL LIMIT $1", + batch_size, + ) + if not rows: + break + ids = [r["id"] for r in rows] + texts = [r["value"] for r in rows] + embeddings = await self._generate_embeddings(texts) + for fact_id, emb in zip(ids, embeddings): + if emb: + vec_str = _vector_to_str(emb) + await conn.execute( + "UPDATE memory_facts SET embedding = $1::vector WHERE id = $2", + vec_str, fact_id, + ) + updated += 1 + # Rate-limit against Ollama Cloud (or any remote embed endpoint) + if len(rows) == batch_size: + await asyncio.sleep(2) + return updated + # ── Facts ──────────────────────────────────────────────────────────────── async def upsert_fact(