diff --git a/navi/api/routes/agents.py b/navi/api/routes/agents.py index c0e3660..d017ea9 100644 --- a/navi/api/routes/agents.py +++ b/navi/api/routes/agents.py @@ -28,6 +28,8 @@ "top_p": p.top_p, "max_iterations": p.max_iterations, "iteration_budget_enabled": p.iteration_budget_enabled, + "think_enabled": p.think_enabled, + "subagent_think_enabled": p.subagent_think_enabled, } for p in profiles.all() ] diff --git a/navi/core/agent.py b/navi/core/agent.py index 71a792d..ee5911c 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -159,6 +159,12 @@ # Sentinel: placed in the event sink by the tool wrapper to signal completion. _TOOL_DONE = object() +# Sub-agents are execution workers. If a sub-agent produces only thinking for a +# long time without text or tool calls, local models can degenerate into endless +# internal-token loops and keep the GPU busy with no user-visible progress. +_SUBAGENT_THINKING_STALL_SECONDS = 60.0 +_SUBAGENT_THINKING_STALL_CHARS = 12_000 + def _todo_status_snapshot(session_id: str) -> frozenset[tuple[str, str]]: """Return a frozenset of (task_text, status) for the current session's todo list. @@ -369,6 +375,11 @@ stop_event = current_stop_event.get() tool_map = {t.name: t for t in tools} + subagent_think = ( + profile.subagent_think_enabled + if profile.subagent_think_enabled is not None + else profile.think_enabled + ) _sub_tokens: int = 0 # tokens from the final LLM call _sub_tool_count: int = 0 # total tool calls across all iterations @@ -406,6 +417,8 @@ accumulated_thinking = "" turn_tool_calls: list[ToolCallRequest] | None = None turn_tokens: int | None = None + thinking_started_at: float | None = None + thinking_stalled_reason: str | None = None # Build context inline — no persona or profiles block for subagents. built_ctx: list[Message] = [subagent_sys_msg] @@ -420,7 +433,7 @@ tools=tool_schemas if tools else None, temperature=profile.temperature, model=profile.model, - think=profile.think_enabled, + think=subagent_think, top_k=profile.top_k, top_p=profile.top_p, num_thread=profile.num_thread, @@ -432,7 +445,26 @@ if chunk.prompt_tokens is not None or chunk.completion_tokens is not None: turn_tokens = (chunk.prompt_tokens or 0) + (chunk.completion_tokens or 0) if chunk.thinking: + if thinking_started_at is None: + thinking_started_at = _time.monotonic() accumulated_thinking += chunk.thinking + thinking_elapsed = _time.monotonic() - thinking_started_at + if ( + thinking_elapsed >= _SUBAGENT_THINKING_STALL_SECONDS + or len(accumulated_thinking) >= _SUBAGENT_THINKING_STALL_CHARS + ): + thinking_stalled_reason = ( + "Sub-agent produced only thinking output for " + f"{thinking_elapsed:.0f}s / {len(accumulated_thinking)} chars " + "without text or tool calls." + ) + log.warning( + "agent.subagent.thinking_stall", + elapsed=thinking_elapsed, + chars=len(accumulated_thinking), + profile_id=profile_id, + ) + break if chunk.delta: accumulated_text += chunk.delta if chunk.tool_calls: @@ -441,6 +473,14 @@ if stop_event and stop_event.is_set(): return accumulated_text, False + if thinking_stalled_reason: + if sink is not None: + await sink.put(SubagentComplete( + token_count=turn_tokens or 0, + tool_call_count=_sub_tool_count, + )) + return f"[{thinking_stalled_reason}]", False + if not turn_tool_calls: log.info("agent.subagent.complete", iterations=iteration + 1, result_len=len(accumulated_text)) diff --git a/navi/profiles/base.py b/navi/profiles/base.py index b313d79..b85528f 100644 --- a/navi/profiles/base.py +++ b/navi/profiles/base.py @@ -86,10 +86,13 @@ # subagent_tools: tool names available to sub-agents spawned from this profile. # If empty, falls back to enabled_tools minus dangerous/irrelevant ones. # subagent_planning_enabled: if True, sub-agents run the planning phase before their tool loop. + # subagent_think_enabled: controls extended reasoning for sub-agents. If None, + # sub-agents inherit think_enabled from the parent profile. # subagent_system_prompt: injected as an additional system message for sub-agents, # after the profile's main system_prompt. Loaded from subagent_system_prompt.txt if present. subagent_tools: list[str] = Field(default_factory=list) subagent_planning_enabled: bool = False + subagent_think_enabled: bool | None = None subagent_system_prompt: str = "" # Extra context providers to inject for this profile (by name). diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py index 76f2200..823161e 100644 --- a/navi/profiles/loader.py +++ b/navi/profiles/loader.py @@ -93,6 +93,7 @@ adaptive_replan_enabled=config.get("adaptive_replan_enabled", False), subagent_tools=config.get("subagent_tools", []), subagent_planning_enabled=config.get("subagent_planning_enabled", False), + subagent_think_enabled=config.get("subagent_think_enabled", None), subagent_system_prompt=subagent_system_prompt, context_providers=config.get("context_providers", []), )) diff --git a/navi/profiles/modeler_3d/config.json b/navi/profiles/modeler_3d/config.json index fcce046..d3b9dee 100644 --- a/navi/profiles/modeler_3d/config.json +++ b/navi/profiles/modeler_3d/config.json @@ -30,6 +30,7 @@ "step_validation_enabled": true, "adaptive_replan_enabled": true, "subagent_planning_enabled": false, + "subagent_think_enabled": false, "subagent_tools": [ "scratchpad", "filesystem", diff --git a/tests/unit/profiles/test_base.py b/tests/unit/profiles/test_base.py index bfe136e..a663999 100644 --- a/tests/unit/profiles/test_base.py +++ b/tests/unit/profiles/test_base.py @@ -56,6 +56,7 @@ assert p.iteration_budget_enabled is True assert p.anti_stall_enabled is True assert p.anti_stall_threshold == 8 + assert p.subagent_think_enabled is None def test_max_iterations_default(self): p = AgentProfile(