diff --git a/.env.example b/.env.example index cba2ef7..b24c4a1 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,6 @@ OLLAMA_HOST=http://localhost:11434 OLLAMA_API_KEY= -OLLAMA_DEFAULT_MODEL=gemma4:e4b-it-q_8 +OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud OPENAI_API_KEY= ANTHROPIC_API_KEY= diff --git a/README.md b/README.md index ec36d0f..3116cc0 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ # LLM OLLAMA_HOST=http://localhost:11434 OLLAMA_API_KEY= -OLLAMA_DEFAULT_MODEL=gemma4:e4b-it-q8_0 +OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud OLLAMA_NUM_CTX=65536 OLLAMA_THINK=true diff --git a/docs/api.md b/docs/api.md index 57c045d..f92a8b9 100644 --- a/docs/api.md +++ b/docs/api.md @@ -34,7 +34,7 @@ "description": "General-purpose assistant", "enabled_tools": ["todo", "web_search", "filesystem", "..."], "llm_backend": "ollama", - "model": "gemma4:26b-a4b-it-q4_K_M" + "model": "gemma4:31b-cloud" } ] ``` diff --git a/docs/config.md b/docs/config.md index 9dabc4c..d452d6a 100644 --- a/docs/config.md +++ b/docs/config.md @@ -8,7 +8,7 @@ |---|---|---|---| | `OLLAMA_HOST` | str | `http://localhost:11434` | Ollama server URL | | `OLLAMA_API_KEY` | str | `""` | Ollama Cloud API key for direct `https://ollama.com` access | -| `OLLAMA_DEFAULT_MODEL` | str | `gemma4:e2b-it-q8_0` | Default model (can be overridden per profile) | +| `OLLAMA_DEFAULT_MODEL` | str | `gemma4:31b-cloud` | Default model (can be overridden per profile) | | `OLLAMA_NUM_CTX` | int | `65536` | Context window size in tokens | | `OLLAMA_THINK` | bool | `true` | Enable extended reasoning (thinking) | | `OPENAI_API_KEY` | str | `""` | OpenAI API key (if using OpenAI backend) | @@ -80,7 +80,7 @@ ```dotenv OLLAMA_HOST=http://localhost:11434 OLLAMA_API_KEY= -OLLAMA_DEFAULT_MODEL=gemma4:e2b-it-q8_0 +OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud OLLAMA_NUM_CTX=65536 OLLAMA_THINK=true diff --git a/docs/index.md b/docs/index.md index 768ef83..2f7ebd8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -48,7 +48,7 @@ - **Web framework**: FastAPI + uvicorn - **LLM**: Ollama (primary), OpenAI-compatible backend wired in -- **Default model**: `gemma4:e2b-it-q8_0` (configurable per profile) +- **Default model**: `gemma4:31b-cloud` (configurable per profile) - **Database**: SQLite via aiosqlite - **Logging**: structlog - **Config**: pydantic-settings (reads `.env`) diff --git a/docs/profiles.md b/docs/profiles.md index a73ca4c..1a88459 100644 --- a/docs/profiles.md +++ b/docs/profiles.md @@ -18,7 +18,7 @@ system_prompt: str # loaded from system_prompt.txt enabled_tools: list[str] # tools available in the main loop llm_backend: str = "ollama" - model: str = "gemma4:26b-a4b-it-q4_K_M" + model: str = "gemma4:31b-cloud" max_iterations: int = 10 temperature: float = 0.7 planning_enabled: bool = False @@ -48,9 +48,9 @@ | ID | Name | Model | Temp | Planning | |---|---|---|---|---| -| `secretary` | Personal Secretary | gemma4:26b-a4b-it-q4_K_M | 0.7 | Yes | -| `server_admin` | Server Administrator | gemma4:26b-a4b-it-q4_K_M | 0.2 | Yes | -| `developer` | Tool Developer | gemma4:26b-a4b-it-q4_K_M | 0.2 | Yes | +| `secretary` | Personal Secretary | gemma4:31b-cloud | 0.7 | Yes | +| `server_admin` | Server Administrator | gemma4:31b-cloud | 0.2 | Yes | +| `developer` | Tool Developer | gemma4:31b-cloud | 0.2 | Yes | All profiles share a base tool set. User tools from `tools/enabled.json` are merged in at runtime. @@ -84,7 +84,7 @@ "name": "My Profile", "description": "...", "short_description": "...", - "model": "gemma4:26b-a4b-it-q4_K_M", + "model": "gemma4:31b-cloud", "temperature": 0.5, "max_iterations": 30, "planning_enabled": true, diff --git a/docs/visual.html b/docs/visual.html index ce00e17..b3b2f24 100644 --- a/docs/visual.html +++ b/docs/visual.html @@ -476,7 +476,7 @@
Default model
-
gemma4:e2b-it-q8_0
+
gemma4:31b-cloud
Ollama, 2B active params
@@ -1196,19 +1196,19 @@ Profile IDNameModelTempPlanning secretaryPersonal Secretary - gemma4:26b-a4b-it-q4_K_M + gemma4:31b-cloud 0.7 Yes server_adminServer Administrator - gemma4:26b-a4b-it-q4_K_M + gemma4:31b-cloud 0.2 Yes smart_homeSmart Home Assistant - gemma4:26b-a4b-it-q4_K_M + gemma4:31b-cloud 0.3 Yes @@ -1291,7 +1291,7 @@ - +
VariableDefaultDescription
OLLAMA_HOSThttp://localhost:11434Ollama server URL
OLLAMA_DEFAULT_MODELgemma4:e2b-it-q8_0Default model (overridable per profile)
OLLAMA_DEFAULT_MODELgemma4:31b-cloudDefault model (overridable per profile)
OLLAMA_NUM_CTX65536Context window size in tokens
OLLAMA_THINKtrueEnable extended reasoning
diff --git a/navi/config.py b/navi/config.py index 65f9f4a..601b43b 100644 --- a/navi/config.py +++ b/navi/config.py @@ -9,7 +9,7 @@ ollama_host: str = "http://localhost:11434" ollama_api_key: str = "" - ollama_default_model: str = "gemma4:e4b-it-q8_0" + ollama_default_model: str = "gemma4:31b-cloud" ollama_num_ctx: int = 65536 ollama_think: bool = True diff --git a/navi/profiles/base.py b/navi/profiles/base.py index 9078ca4..7a99966 100644 --- a/navi/profiles/base.py +++ b/navi/profiles/base.py @@ -15,7 +15,7 @@ system_prompt: str enabled_tools: list[str] # tool names; resolved by ToolRegistry at runtime llm_backend: str = "ollama" # backend key, e.g. "ollama", "openai" - model: str = "gemma4:26b-a4b-it-q4_K_M" + model: str = "gemma4:31b-cloud" max_iterations: int = 10 temperature: float = 0.7 planning_enabled: bool = False # if True, run a planning LLM call before the main loop diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json index a320cf5..39d7496 100644 --- a/navi/profiles/developer/config.json +++ b/navi/profiles/developer/config.json @@ -9,7 +9,7 @@ "key_tools": "filesystem, code_exec, terminal, web_search, web_view, spawn_agent" }, "llm_backend": "ollama", - "model": "gemma4:31b", + "model": "gemma4:31b-cloud", "temperature": 0.2, "max_iterations": 35, "planning_enabled": true, diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py index 7441c0d..4402efa 100644 --- a/navi/profiles/loader.py +++ b/navi/profiles/loader.py @@ -59,7 +59,7 @@ system_prompt=system_prompt, enabled_tools=config["enabled_tools"], llm_backend=config.get("llm_backend", "ollama"), - model=config.get("model", "gemma4:26b-a4b-it-q4_K_M"), + model=config.get("model", "gemma4:31b-cloud"), temperature=config.get("temperature", 0.7), max_iterations=config.get("max_iterations", 20), planning_enabled=config.get("planning_enabled", False), diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json index fef7db3..a144b16 100644 --- a/navi/profiles/secretary/config.json +++ b/navi/profiles/secretary/config.json @@ -9,7 +9,7 @@ "key_tools": "web_search, web_view, filesystem, code_exec, gmail, todo, scratchpad, spawn_agent, memory" }, "llm_backend": "ollama", - "model": "gemma4:31b", + "model": "gemma4:31b-cloud", "temperature": 0.5, "max_iterations": 25, "planning_enabled": true, diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json index e6ff0ce..64039b4 100644 --- a/navi/profiles/server_admin/config.json +++ b/navi/profiles/server_admin/config.json @@ -9,7 +9,7 @@ "key_tools": "ssh_exec, terminal, filesystem, code_exec, web_search, spawn_agent, memory" }, "llm_backend": "ollama", - "model": "gemma4:31b", + "model": "gemma4:31b-cloud", "temperature": 0.2, "max_iterations": 20, "planning_enabled": true, diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json index b89c274..8cb5a99 100644 --- a/navi/profiles/tool_developer/config.json +++ b/navi/profiles/tool_developer/config.json @@ -9,7 +9,7 @@ "key_tools": "write_tool, reload_tools, delete_tool, test_tool, filesystem, terminal, code_exec, memory" }, "llm_backend": "ollama", - "model": "gemma4:31b", + "model": "gemma4:31b-cloud", "temperature": 0.2, "max_iterations": 35, "planning_enabled": true,