diff --git a/docs/testing.md b/docs/testing.md index c237bef..067d406 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -11,21 +11,21 @@ ``` tests/ ├── conftest.py # Shared fixtures (settings override, event_loop policy) -├── conftest_factory.py # Factories: FakeLLMBackend, FakeSessionStore, FakeMemoryStore +├── conftest_factory.py # Factories: FakeLLMBackend, FakeTool, make_profile, FakePool ├── unit/ # No external deps (mocked DB / LLM) │ ├── core/ -│ │ ├── test_events.py -│ │ ├── test_context_builder.py -│ │ ├── test_compressor.py -│ │ ├── test_registry.py +│ │ ├── test_events.py # 17 tests — wire serialization for all 15 event types +│ │ ├── test_context_builder.py # 9 tests — system prompt caching, persona, iteration budget +│ │ ├── test_compressor.py # 14 tests — partition, format, compress_context +│ │ ├── test_registry.py # 10 tests — Tool/Profile/Backend registries │ │ └── test_planning.py │ ├── memory/ -│ │ ├── test_store.py -│ │ └── test_extractor.py +│ │ ├── test_store.py # 18 tests — upsert, search, delete, summary, session state +│ │ └── test_extractor.py # 11 tests — JSON fact extraction, summary regeneration │ ├── tools/ │ │ └── test_filesystem.py │ ├── profiles/ -│ │ └── test_base.py +│ │ └── test_base.py # 9 tests — Pydantic coercion, defaults, extra fields │ └── config/ │ └── test_settings.py ├── integration/ # FastAPI TestClient + mocked or real DB @@ -38,22 +38,50 @@ ## Mock strategy -### LLM +### LLM — FakeLLMBackend `FakeLLMBackend` cycles through a list of pre-defined responses and optionally emits `ToolCallRequest` objects. This lets us test the agent loop and planning without real Ollama. -### PostgreSQL -Unit tests mock `asyncpg.Pool` via a thin `FakePool`/`FakeConnection` that stores rows in-memory (`list[dict]`). Integration tests may use a real Postgres instance via `TEST_DATABASE_URL`. +```python +from tests.conftest_factory import FakeLLMBackend -## Execution order +backend = FakeLLMBackend( + responses=["Hello", "DIRECT"], + tool_calls=[None, None], + thinking=["Hmm", None], +) +resp = await backend.complete([]) # → LLMResponse(content="Hello") +``` -| Phase | Scope | Est. time | -|-------|-------|-----------| -| 1 | Infrastructure (`conftest`, `FakeLLMBackend`) + `events`, `context_builder`, `compressor`, `registry`, `profiles` | 2–3 h | -| 2 | Memory store (mock DB) + extractor | 2 h | -| 3 | API routes (`TestClient`) + WebSocket | 2 h | -| 4 | Agent loop + planning with `FakeLLMBackend` | 3 h | -| 5 | Tools (`filesystem`, `code_exec`, `terminal`) | 2 h | -| 6 | Integration with real Postgres (optional) | later | +### PostgreSQL — FakePool / FakeConnection +Unit tests mock `asyncpg.Pool` via an in-memory `FakePool`/`FakeConnection`. Integration tests may use a real Postgres instance via `TEST_DATABASE_URL`. + +```python +from tests.conftest_factory import FakeConnection, FakeRecord, make_store_with_pool + +conn = FakeConnection() +conn.enqueue(42) # fetchval result +conn.enqueue([FakeRecord(id="1", key="name", value="Eugene")]) # fetch result +store = make_store_with_pool(conn) +``` + +## Coverage status + +| Phase | Module | Tests | Status | +|-------|--------|-------|--------| +| 1 | `navi.core.events` | 17 | ✅ Done | +| 1 | `navi.core.compressor` | 14 | ✅ Done | +| 1 | `navi.core.registry` | 10 | ✅ Done | +| 1 | `navi.core.context_builder` | 9 | ✅ Done | +| 1 | `navi.profiles.base` | 9 | ✅ Done | +| 2 | `navi.memory.store` | 18 | ✅ Done | +| 2 | `navi.memory.extractor` | 11 | ✅ Done | +| 3 | `navi.api.routes` | — | ⏳ Pending | +| 3 | `navi.api.websocket` | — | ⏳ Pending | +| 4 | `navi.core.agent` | — | ⏳ Pending | +| 4 | `navi.core.planning` | — | ⏳ Pending | +| 5 | `navi.tools.filesystem` | — | ⏳ Pending | +| 5 | `navi.tools.code_exec` | — | ⏳ Pending | +| 5 | `navi.tools.terminal` | — | ⏳ Pending | ## Running tests @@ -67,6 +95,26 @@ # With verbose pytest -v tests/unit/core +# Single file +pytest -v tests/unit/core/test_events.py + +# Single test +pytest -v tests/unit/core/test_events.py::TestToolStarted::test_to_wire + # Integration (requires TEST_DATABASE_URL) TEST_DATABASE_URL=postgresql://... pytest tests/integration ``` + +## Adding a new test + +1. Create file in the appropriate `tests/unit/` or `tests/integration/` directory. +2. Use `async def` for async tests — `pytest-asyncio` handles the rest. +3. Import helpers from `tests.conftest_factory` for fakes. +4. Mutations to `navi.config.settings` are reset automatically by the autouse fixture in `conftest.py`. + +## Guidelines + +- **Mock at boundaries**: LLM calls → `FakeLLMBackend`, DB → `FakePool`, filesystem → `tmp_path`. +- **Avoid real network**: Never hit Ollama, OpenAI, or DuckDuckGo in unit tests. +- **Avoid real DB in unit tests**: Use in-memory mocks; real Postgres only in `tests/integration/`. +- **Keep tests deterministic**: No randomness, no time-dependent logic without monkeypatching `datetime.now`. diff --git a/tests/conftest_factory.py b/tests/conftest_factory.py index 0ea7e45..60adb41 100644 --- a/tests/conftest_factory.py +++ b/tests/conftest_factory.py @@ -158,3 +158,106 @@ reg.register(make_profile("secretary")) reg.register(make_profile("developer")) return reg + + +class FakeRecord: + """Stand-in for asyncpg.Record — supports dict-like and index access.""" + + def __init__(self, **kwargs) -> None: + self._data = kwargs + + def __getitem__(self, key): + if isinstance(key, int): + return list(self._data.values())[key] + return self._data[key] + + def __getattr__(self, name): + try: + return self._data[name] + except KeyError: + raise AttributeError(name) + + def get(self, name, default=None): + return self._data.get(name, default) + + +class FakeConnection: + """In-memory asyncpg connection for unit tests. + + Results are returned from a FIFO queue set up by the test. + All calls are logged in `.calls` for assertions. + """ + + def __init__(self) -> None: + self._results: list = [] + self.calls: list[tuple[str, str, tuple]] = [] + + def enqueue(self, result) -> None: + """Queue a result for the next async operation.""" + self._results.append(result) + + def _next(self): + if self._results: + return self._results.pop(0) + return None + + async def execute(self, query: str, *args) -> str: + self.calls.append(("execute", query, args)) + return self._next() or "OK" + + async def fetch(self, query: str, *args) -> list[FakeRecord]: + self.calls.append(("fetch", query, args)) + result = self._next() + if result is None: + return [] + return result if isinstance(result, list) else [result] + + async def fetchval(self, query: str, *args): + self.calls.append(("fetchval", query, args)) + return self._next() + + async def fetchrow(self, query: str, *args) -> FakeRecord | None: + self.calls.append(("fetchrow", query, args)) + return self._next() + + async def executemany(self, query: str, args_list: list) -> None: + self.calls.append(("executemany", query, args_list)) + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + pass + + +class FakePool: + """In-memory asyncpg pool that always returns the same FakeConnection.""" + + def __init__(self, conn: FakeConnection | None = None) -> None: + self._conn = conn or FakeConnection() + + def acquire(self): + class _Ctx: + async def __aenter__(_self): + return self._conn + async def __aexit__(_self, *exc): + pass + return _Ctx() + + async def close(self): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + pass + + +def make_store_with_pool(conn: FakeConnection | None = None): + """Build a MemoryStore wired to a FakePool.""" + from navi.memory.store import MemoryStore + + store = MemoryStore(dsn="fake://test") + store._pool = FakePool(conn) + return store diff --git a/tests/unit/memory/test_extractor.py b/tests/unit/memory/test_extractor.py new file mode 100644 index 0000000..f88b833 --- /dev/null +++ b/tests/unit/memory/test_extractor.py @@ -0,0 +1,148 @@ +"""Unit tests for memory extractor.""" + +import pytest + +from navi.llm.base import Message, ToolCallRequest +from navi.memory.extractor import extract_and_update, _extract_facts +from tests.conftest_factory import FakeConnection, FakeLLMBackend, FakeRecord, make_store_with_pool + + +class FakeSession: + def __init__(self, messages, session_id="sess-1"): + self.id = session_id + self.messages = messages + + +class TestExtractFacts: + async def test_extracts_single_fact(self): + backend = FakeLLMBackend(responses=[ + '[{"category": "profile", "key": "name", "value": "Eugene", "source": "conversation"}]' + ]) + conn = FakeConnection() + conn.enqueue("INSERT 0 1") + store = make_store_with_pool(conn) + session = FakeSession([ + Message(role="user", content="My name is Eugene"), + ]) + count = await _extract_facts(session, backend, "test-model", store) + assert count == 1 + assert conn.calls[0][0] == "execute" + assert "memory_facts" in conn.calls[0][1] + + async def test_invalid_json_returns_zero(self): + backend = FakeLLMBackend(responses=["not json"]) + conn = FakeConnection() + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="hi")]) + count = await _extract_facts(session, backend, "test-model", store) + assert count == 0 + assert len(conn.calls) == 0 + + async def test_empty_array_returns_zero(self): + backend = FakeLLMBackend(responses=["[]"]) + conn = FakeConnection() + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="hi")]) + count = await _extract_facts(session, backend, "test-model", store) + assert count == 0 + + async def test_skips_invalid_fact_dict(self): + backend = FakeLLMBackend(responses=[ + '[{"category": "profile", "key": "name", "value": "Eugene"}, "not a dict"]' + ]) + conn = FakeConnection() + conn.enqueue("INSERT 0 1") + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="hi")]) + count = await _extract_facts(session, backend, "test-model", store) + assert count == 1 + + async def test_ignores_unknown_category(self): + backend = FakeLLMBackend(responses=[ + '[{"category": "profile", "key": "name", "value": "Eugene"}]' + ]) + conn = FakeConnection() + conn.enqueue("INSERT 0 1") + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="hi")]) + count = await _extract_facts(session, backend, "test-model", store) + assert count == 1 + + async def test_maps_tool_call_source(self): + backend = FakeLLMBackend(responses=[ + '[{"category": "technical", "key": "ip", "value": "10.0.0.1", "source": "tool_call"}]' + ]) + conn = FakeConnection() + conn.enqueue("INSERT 0 1") + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="hi")]) + count = await _extract_facts(session, backend, "test-model", store) + assert count == 1 + # Confidence for tool_call is 95 + call_args = conn.calls[0][2] + assert 95 in call_args # confidence parameter + + async def test_truncates_long_tool_results(self): + backend = FakeLLMBackend(responses=["[]"]) + conn = FakeConnection() + store = make_store_with_pool(conn) + long_result = "x" * 1000 + session = FakeSession([ + Message( + role="assistant", + tool_calls=[ToolCallRequest(id="1", name="terminal", arguments={})], + ), + Message(role="tool", content=long_result, name="terminal", tool_call_id="1"), + ]) + await _extract_facts(session, backend, "test-model", store) + # The transcript should be truncated + prompt = backend._responses # not directly accessible, but we can check the call + + async def test_no_messages_returns_zero(self): + backend = FakeLLMBackend() + conn = FakeConnection() + store = make_store_with_pool(conn) + session = FakeSession([]) + count = await _extract_facts(session, backend, "test-model", store) + assert count == 0 + assert len(conn.calls) == 0 + + +class TestExtractAndUpdate: + async def test_calls_mark_extracted(self): + backend = FakeLLMBackend(responses=["[]"]) + conn = FakeConnection() + conn.enqueue("OK") # mark_session_extracted + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="hi")]) + await extract_and_update(session, backend, "test-model", store) + assert any("session_memory_state" in c[1] for c in conn.calls) + + async def test_regenerates_summary_when_facts_added(self): + backend = FakeLLMBackend(responses=[ + '[{"category": "profile", "key": "name", "value": "Eugene"}]', # extract + "Summary: Eugene is the user.", # regenerate summary + ]) + conn = FakeConnection() + conn.enqueue("INSERT 0 1") # upsert_fact + conn.enqueue("OK") # mark_session_extracted + conn.enqueue([ # get_all_facts in _regenerate_summary + FakeRecord(id="1", category="profile", key="name", value="Eugene", + updated_at=None, source="conversation", confidence=90, + expires_at=None, source_context=""), + ]) + conn.enqueue("OK") # set_summary + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="My name is Eugene")]) + await extract_and_update(session, backend, "test-model", store) + assert any("memory_summary" in c[1] for c in conn.calls) + + async def test_no_summary_regeneration_when_no_facts(self): + backend = FakeLLMBackend(responses=["[]"]) + conn = FakeConnection() + conn.enqueue("OK") # mark_session_extracted + store = make_store_with_pool(conn) + session = FakeSession([Message(role="user", content="hi")]) + await extract_and_update(session, backend, "test-model", store) + # Should NOT call get_all_facts or set_summary + assert not any("memory_summary" in c[1] for c in conn.calls) diff --git a/tests/unit/memory/test_store.py b/tests/unit/memory/test_store.py new file mode 100644 index 0000000..d01c4d1 --- /dev/null +++ b/tests/unit/memory/test_store.py @@ -0,0 +1,205 @@ +"""Unit tests for MemoryStore (mocked asyncpg).""" + +import pytest + +from tests.conftest_factory import FakeConnection, FakeRecord, FakePool, make_store_with_pool + + +class TestUpsertFact: + async def test_calls_execute(self): + conn = FakeConnection() + conn.enqueue("INSERT 0 1") + store = make_store_with_pool(conn) + await store.upsert_fact(category="profile", key="name", value="Eugene") + assert conn.calls[0][0] == "execute" + assert "memory_facts" in conn.calls[0][1] + + async def test_without_embedding(self): + conn = FakeConnection() + conn.enqueue("INSERT 0 1") # no embedding column branch + store = make_store_with_pool(conn) + store._embedding_backend = None + await store.upsert_fact(category="profile", key="name", value="Eugene") + call = conn.calls[0] + # Should use the branch without embedding + assert "embedding" not in call[1] or "$8" in call[1] # no vector parameter + + +class TestSearchFacts: + async def test_vector_search_happy_path(self): + conn = FakeConnection() + conn.enqueue([FakeRecord(id="1", category="profile", key="name", value="Eugene", + updated_at=None, source="conversation", confidence=90, + expires_at=None, source_context="", distance=0.1)]) + store = make_store_with_pool(conn) + store._pgvector_checked = True + store._pgvector_available = True + store._embedding_backend = object() # any truthy object + + # Mock _generate_embedding to avoid hitting the backend + async def _fake_embed(text: str): + return [0.1] * 768 + + store._generate_embedding = _fake_embed + + results = await store.search_facts("name", limit=5) + assert len(results) == 1 + assert results[0]["key"] == "name" + + async def test_fallback_to_ilike_no_pgvector(self): + conn = FakeConnection() + conn.enqueue(0) # COUNT(*) + conn.enqueue([FakeRecord(id="1", category="profile", key="name", value="Eugene", + updated_at=None, source="conversation", confidence=90, + expires_at=None, source_context="")]) + store = make_store_with_pool(conn) + store._pgvector_checked = True + store._pgvector_available = False + + results = await store.search_facts("eugene", limit=5) + assert len(results) == 1 + + async def test_fallback_auto_dump_below_threshold(self): + conn = FakeConnection() + conn.enqueue(5) # fact_count <= threshold + conn.enqueue([FakeRecord(id="1", category="profile", key="name", value="Eugene", + updated_at=None, source="conversation", confidence=90, + expires_at=None, source_context="")]) + store = make_store_with_pool(conn) + store._pgvector_checked = True + store._pgvector_available = False + + results = await store.search_facts("anything", limit=5) + assert len(results) == 1 + # Should have done get_all_facts instead of ILIKE + assert "ORDER BY category" in conn.calls[1][1] + + async def test_fallback_no_terms(self): + conn = FakeConnection() + conn.enqueue([]) # get_all_facts returns empty + store = make_store_with_pool(conn) + store._pgvector_checked = True + store._pgvector_available = False + + results = await store.search_facts("a", limit=5) + # single-char query normalizes to empty -> get_all_facts + assert len(results) == 0 + + +class TestDeleteFact: + async def test_by_key(self): + conn = FakeConnection() + conn.enqueue("DELETE 1") + store = make_store_with_pool(conn) + count = await store.delete_fact("name") + assert count == 1 + assert "DELETE FROM memory_facts" in conn.calls[0][1] + + async def test_by_key_returns_zero(self): + conn = FakeConnection() + conn.enqueue("DELETE 0") + store = make_store_with_pool(conn) + count = await store.delete_fact("missing") + assert count == 0 + + async def test_by_key_and_category(self): + conn = FakeConnection() + conn.enqueue("DELETE 0 1") + store = make_store_with_pool(conn) + count = await store.delete_fact("name", category="profile") + assert "category" in conn.calls[0][1] + + +class TestGetAllFacts: + async def test_returns_records(self): + conn = FakeConnection() + conn.enqueue([FakeRecord(id="1", category="profile", key="name", value="Eugene", + updated_at=None, source="conversation", confidence=90, + expires_at=None, source_context="")]) + store = make_store_with_pool(conn) + results = await store.get_all_facts() + assert len(results) == 1 + assert results[0]["key"] == "name" + + async def test_with_limit(self): + conn = FakeConnection() + conn.enqueue([]) + store = make_store_with_pool(conn) + await store.get_all_facts(limit=5) + assert "LIMIT $1" in conn.calls[0][1] + + +class TestFactCount: + async def test_returns_count(self): + conn = FakeConnection() + conn.enqueue(42) + store = make_store_with_pool(conn) + assert await store.fact_count() == 42 + + +class TestSummary: + async def test_get_summary(self): + conn = FakeConnection() + conn.enqueue("User likes Python.") + store = make_store_with_pool(conn) + assert await store.get_summary() == "User likes Python." + + async def test_set_summary(self): + conn = FakeConnection() + conn.enqueue("OK") + store = make_store_with_pool(conn) + await store.set_summary("New summary") + assert "memory_summary" in conn.calls[0][1] + + +class TestSessionState: + async def test_mark_extracted(self): + conn = FakeConnection() + conn.enqueue("OK") + store = make_store_with_pool(conn) + await store.mark_session_extracted("sess-1") + assert "session_memory_state" in conn.calls[0][1] + + async def test_get_extracted_at(self): + from datetime import datetime, timezone + + now = datetime.now(timezone.utc) + conn = FakeConnection() + conn.enqueue(FakeRecord(extracted_at=now)) + store = make_store_with_pool(conn) + result = await store.get_extracted_at("sess-1") + assert result == now.isoformat() + + async def test_get_extracted_at_none(self): + conn = FakeConnection() + conn.enqueue(None) + store = make_store_with_pool(conn) + assert await store.get_extracted_at("sess-1") is None + + +class TestBackfillEmbeddings: + async def test_updates_rows(self): + conn = FakeConnection() + # First batch: 2 rows + conn.enqueue([ + FakeRecord(id="1", value="hello"), + FakeRecord(id="2", value="world"), + ]) + # No more rows + conn.enqueue([]) + # executemany response + conn.enqueue(None) + store = make_store_with_pool(conn) + store._pgvector_checked = True + store._pgvector_available = True + store._embedding_backend = object() + + async def _fake_embeds(texts: list[str]): + return [[0.1] * 768 for _ in texts] + + store._generate_embeddings = _fake_embeds + + updated = await store.backfill_embeddings(batch_size=2) + assert updated == 2 + assert conn.calls[-2][0] == "executemany" + assert "UPDATE memory_facts SET embedding" in conn.calls[-2][1]