diff --git a/navi/core/agent.py b/navi/core/agent.py index 1c078c2..2503493 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -245,6 +245,8 @@ tools=tool_schemas if tools else None, temperature=profile.temperature, model=profile.model, + top_k=profile.top_k, + top_p=profile.top_p, ) if response.finish_reason == "stop" or not response.tool_calls: @@ -414,6 +416,8 @@ temperature=profile.temperature, model=profile.model, think=profile.think_enabled, + top_k=profile.top_k, + top_p=profile.top_p, ), stop_event=stop_event, first_chunk_timeout=settings.llm_stream_first_chunk_timeout, @@ -697,6 +701,8 @@ temperature=profile.temperature, model=profile.model, think=profile.think_enabled, + top_k=profile.top_k, + top_p=profile.top_p, ), stop_event=stop_event, first_chunk_timeout=settings.llm_stream_first_chunk_timeout, diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py index a816d83..c12d27c 100644 --- a/navi/llm/ollama.py +++ b/navi/llm/ollama.py @@ -34,6 +34,8 @@ temperature: float, think: bool | None = None, max_tokens: int | None = None, + top_k: int | None = None, + top_p: float | None = None, ) -> dict: opts: dict = {"temperature": temperature, "num_ctx": settings.ollama_num_ctx} # think=None → use global setting; think=False → force off even if global is True @@ -42,6 +44,10 @@ opts["think"] = True if max_tokens is not None: opts["num_predict"] = max_tokens + if top_k is not None: + opts["top_k"] = top_k + if top_p is not None: + opts["top_p"] = top_p return opts @@ -84,13 +90,15 @@ model: "list[str] | str | None" = None, think: bool | None = None, max_tokens: int | None = None, + top_k: int | None = None, + top_p: float | None = None, ) -> LLMResponse: resolved = _resolve_model(model, self.model) try: kwargs: dict = { "model": resolved, "messages": _to_ollama_messages(messages), - "options": _base_options(temperature, think=think, max_tokens=max_tokens), + "options": _base_options(temperature, think=think, max_tokens=max_tokens, top_k=top_k, top_p=top_p), "stream": False, } if tools: @@ -129,13 +137,15 @@ messages: list[Message], temperature: float = 0.7, model: "list[str] | str | None" = None, + top_k: int | None = None, + top_p: float | None = None, ) -> AsyncGenerator[LLMChunk, None]: resolved = _resolve_model(model, self.model) try: async for chunk in await self._client.chat( model=resolved, messages=_to_ollama_messages(messages), - options=_base_options(temperature), + options=_base_options(temperature, top_k=top_k, top_p=top_p), stream=True, ): thinking = getattr(chunk.message, "thinking", None) or None @@ -160,13 +170,15 @@ temperature: float = 0.7, model: "list[str] | str | None" = None, think: bool | None = None, + top_k: int | None = None, + top_p: float | None = None, ) -> AsyncGenerator[LLMChunk, None]: resolved = _resolve_model(model, self.model) try: kwargs: dict = { "model": resolved, "messages": _to_ollama_messages(messages), - "options": _base_options(temperature, think=think), + "options": _base_options(temperature, think=think, top_k=top_k, top_p=top_p), "stream": True, } if tools: diff --git a/navi/profiles/base.py b/navi/profiles/base.py index b8c111c..ddd6640 100644 --- a/navi/profiles/base.py +++ b/navi/profiles/base.py @@ -24,6 +24,8 @@ self.model = [self.model] max_iterations: int = 10 temperature: float = 0.7 + top_k: int | None = None + top_p: float | None = None planning_enabled: bool = False # if True, run a planning LLM call before the main loop # Profile discoverability — used for system prompt injection and list_profiles tool. diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json index 4b209c3..28f0b19 100644 --- a/navi/profiles/developer/config.json +++ b/navi/profiles/developer/config.json @@ -13,7 +13,7 @@ "gemma4:31b-cloud", "gemma4:26b-a4b-it-q4_K_M" ], - "temperature": 0.2, + "temperature": 1.0, "max_iterations": 35, "planning_enabled": true, "subagent_planning_enabled": true, @@ -61,5 +61,7 @@ "planning_mandatory": false, "planning_phase1_enabled": true, "planning_phase2_enabled": false, - "planning_phase3_enabled": true + "planning_phase3_enabled": true, + "top_k": 64, + "top_p": 0.95 } diff --git a/navi/profiles/discuss/config.json b/navi/profiles/discuss/config.json index 0648a10..9ae734e 100644 --- a/navi/profiles/discuss/config.json +++ b/navi/profiles/discuss/config.json @@ -14,9 +14,7 @@ "web_view", "scratchpad", "reflect", - "memory_search", - "memory_save", - "memory_forget", + "memory", "image_view", "todo", "get_current_datetime", @@ -38,5 +36,7 @@ "anti_stall_threshold": 5, "step_validation_enabled": false, "adaptive_replan_enabled": false, - "subagent_planning_enabled": false + "subagent_planning_enabled": false, + "top_k": 64, + "top_p": 0.95 } diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py index 6ad7fdb..e303d5a 100644 --- a/navi/profiles/loader.py +++ b/navi/profiles/loader.py @@ -72,6 +72,8 @@ llm_backend=config.get("llm_backend", "ollama"), model=_normalize_model(config.get("model", ["gemma4:31b-cloud"])), temperature=config.get("temperature", 0.7), + top_k=config.get("top_k", None), + top_p=config.get("top_p", None), max_iterations=config.get("max_iterations", 20), planning_enabled=config.get("planning_enabled", False), planning_mandatory=config.get("planning_mandatory", False), diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json index 1bb757e..91621f0 100644 --- a/navi/profiles/secretary/config.json +++ b/navi/profiles/secretary/config.json @@ -13,7 +13,7 @@ "gemma4:31b-cloud", "gemma4:26b-a4b-it-q4_K_M" ], - "temperature": 0.5, + "temperature": 1.0, "max_iterations": 25, "planning_enabled": true, "subagent_planning_enabled": true, @@ -61,5 +61,7 @@ "planning_mandatory": false, "planning_phase1_enabled": true, "planning_phase2_enabled": false, - "planning_phase3_enabled": true + "planning_phase3_enabled": true, + "top_k": 64, + "top_p": 0.95 } diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json index b222fb1..1ea4544 100644 --- a/navi/profiles/server_admin/config.json +++ b/navi/profiles/server_admin/config.json @@ -13,7 +13,7 @@ "gemma4:31b-cloud", "gemma4:26b-a4b-it-q4_K_M" ], - "temperature": 0.2, + "temperature": 1.0, "max_iterations": 20, "planning_enabled": true, "subagent_planning_enabled": true, @@ -61,5 +61,7 @@ "planning_mandatory": false, "planning_phase1_enabled": true, "planning_phase2_enabled": false, - "planning_phase3_enabled": true + "planning_phase3_enabled": true, + "top_k": 64, + "top_p": 0.95 } diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json index 354e9d3..648145d 100644 --- a/navi/profiles/tool_developer/config.json +++ b/navi/profiles/tool_developer/config.json @@ -13,7 +13,7 @@ "gemma4:31b-cloud", "gemma4:26b-a4b-it-q4_K_M" ], - "temperature": 0.2, + "temperature": 1.0, "max_iterations": 35, "planning_enabled": true, "subagent_planning_enabled": true, @@ -70,5 +70,7 @@ "planning_mandatory": false, "planning_phase1_enabled": true, "planning_phase2_enabled": false, - "planning_phase3_enabled": true + "planning_phase3_enabled": true, + "top_k": 64, + "top_p": 0.95 }