navi-1/tests/unit/core/test_planning.py at 8c2533d88948a4b3ec855fa1514ec8c9d83527e7

Fork: 0
root / navi-1
Find file
Newer
Older
navi-1 / tests / unit / core / test_planning.py
Eugene Sukhodolskiy on 25 May 6 KB Review fixes: restore _build_sessions, fix flags, search filter, tests
Raw Blame History
"""Unit tests for navi.core.planning."""

import pytest

from navi.core.planning import PlanningEngine, _parse_plan_steps
from navi.llm.base import LLMResponse, Message
from tests.conftest_factory import make_profile


class RecordingLLM:
    def __init__(self, responses):
        self.responses = list(responses)
        self.calls = []

    async def complete(self, messages, **kwargs):
        self.calls.append(messages)
        return LLMResponse(
            content=self.responses.pop(0),
            tool_calls=None,
            finish_reason="stop",
        )


class FakeContextBuilder:
    def build_system_prompt(self, profile):
        return "base system prompt"

    def _mcp_context_msg(self, profile=None):
        if profile and profile.mcp_servers:
            return Message(
                role="system",
                content="gnexus-book instructions with mcp_gnexus-book_search_docs",
            )
        return None


class TestParsePlanSteps:
    def test_basic_numbered_list(self):
        text = "**Steps:**\n1. First step\n2. Second step\n3. Third step"
        assert _parse_plan_steps(text) == ["First step", "Second step", "Third step"]

    def test_parenthesised_numbers(self):
        text = "**Steps:**\n1) Step one\n2) Step two"
        assert _parse_plan_steps(text) == ["Step one", "Step two"]

    def test_ignores_bracket_prefixes(self):
        text = "**Steps:**\n1. [TOOL] Do thing\n2. Normal step"
        assert _parse_plan_steps(text) == ["Normal step"]

    def test_empty_steps_section(self):
        text = "**Steps:**\n\n**Notes:** nothing"
        assert _parse_plan_steps(text) == []

    def test_no_steps_section(self):
        text = "Some random text without steps"
        assert _parse_plan_steps(text) == []


class TestPlanningPrompt:
    async def test_planning_prompt_includes_profile_mcp_and_persistence_rules(self):
        profile = make_profile(
            "server_admin",
            planning_phase2_enabled=False,
            mcp_servers={"gnexus-book": ["read", "write"]},
        )
        llm = RecordingLLM([
            "TASK: document infra\n"
            "GOAL: docs updated\n"
            "UNKNOWNS: NONE\n"
            "RESOURCES:\n"
            "- mcp_gnexus-book_search_docs: search docs\n"
            "- context sources: gnexus-book\n"
            "KNOWLEDGE SOURCE ASSESSMENT:\n"
            "- Domain: infrastructure\n"
            "- Primary source: connected knowledge servers\n"
            "- Fallback: docs\n"
            "KNOWLEDGE CAPTURE:\n"
            "- New information to save: stable infra facts\n"
            "- Target: connected knowledge server\n"
            "- Duplication check: search target\n"
            "- Rationale: reusable\n"
            "COMPLEXITY: medium\n"
            "SUBTASKS:\n"
            "1. Search docs\n"
            "2. Persist facts\n"
            "REFLECT: no\n"
            "COMMITMENTS: checkpoint",
            "## Plan\n\n"
            "**Task:** document infra\n"
            "**Goal:** docs updated\n\n"
            "**Milestones:**\nA. Inspect\nB. Persist\nC. Report\n\n"
            "**Steps:**\n"
            "1. Search gnexus-book → TOOL: mcp_gnexus-book_search_docs\n"
            "2. Knowledge persistence checkpoint → TOOL: mcp_gnexus-book_propose_doc_change\n"
            "3. Final synthesis → SELF\n\n"
            "**Parallel:** NONE\n"
            "**Risks:** NONE",
        ])
        engine = PlanningEngine(FakeContextBuilder())
        context = [Message(role="user", content="update infra docs")]

        events = []
        async for event in engine.run(context, profile, llm, mem=None, tool_schemas=[]):
            events.append(event)

        phase1_prompt = llm.calls[0][0].content
        phase3_prompt = llm.calls[1][0].content
        assert "gnexus-book instructions" in phase1_prompt
        assert "memory` is only for personal user facts and preferences" in phase1_prompt
        assert "Never use memory for infrastructure inventory" in phase1_prompt
        assert "knowledge persistence checkpoint" in phase3_prompt
        assert "Do not plan unavailable MCP tool calls" in phase3_prompt

    async def test_planning_prompt_omits_mcp_when_profile_has_no_mcp_servers(self):
        profile = make_profile(
            "developer",
            planning_phase2_enabled=False,
            mcp_servers={},
        )
        llm = RecordingLLM(["DIRECT"])
        engine = PlanningEngine(FakeContextBuilder())
        context = [Message(role="user", content="hello")]

        async for _event in engine.run(context, profile, llm, mem=None, tool_schemas=[]):
            pass

        phase1_prompt = llm.calls[0][0].content
        assert "gnexus-book instructions" not in phase1_prompt
        assert "Connected MCP knowledge servers are authoritative only when the active profile exposes their tools" in phase1_prompt

    async def test_planning_flags(self):
        """Planning messages must have correct is_display / is_context flags."""
        profile = make_profile("developer", planning_phase2_enabled=False)
        llm = RecordingLLM([
            "TASK: test\nGOAL: done\nUNKNOWNS: NONE\nRESOURCES: NONE\n"
            "KNOWLEDGE SOURCE ASSESSMENT: NONE\nKNOWLEDGE CAPTURE: NONE\n"
            "COMPLEXITY: low\nSUBTASKS:\n1. Step one\nREFLECT: no\nCOMMITMENTS: none",
            "## Plan\n\n**Task:** test\n**Goal:** done\n\n**Steps:**\n1. Step one → SELF\n",
        ])
        engine = PlanningEngine(FakeContextBuilder())
        context = [Message(role="user", content="hello")]
        messages = []

        async for _event in engine.run(context, profile, llm, mem=None, tool_schemas=[], messages=messages):
            pass

        # Messages list should contain: user, plan context (is_display=False), plan marker (is_context=False), prompt
        plan_ctx = [m for m in messages if m.role == "assistant" and not m.is_plan and m.is_display is False]
        plan_marker = [m for m in messages if m.is_plan is True]
        prompt_msgs = [m for m in messages if m.role == "user" and m.content.startswith("Plan is ready")]

        assert len(plan_ctx) == 1
        assert plan_ctx[0].is_context is True
        assert len(plan_marker) == 1
        assert plan_marker[0].is_context is False
        assert plan_marker[0].is_display is True
        assert len(prompt_msgs) == 1
        assert prompt_msgs[0].is_display is False