diff --git a/.env.example b/.env.example
index cba2ef7..b24c4a1 100644
--- a/.env.example
+++ b/.env.example
@@ -1,6 +1,6 @@
 OLLAMA_HOST=http://localhost:11434
 OLLAMA_API_KEY=
-OLLAMA_DEFAULT_MODEL=gemma4:e4b-it-q_8
+OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud
 
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
diff --git a/README.md b/README.md
index ec36d0f..3116cc0 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@
 # LLM
 OLLAMA_HOST=http://localhost:11434
 OLLAMA_API_KEY=
-OLLAMA_DEFAULT_MODEL=gemma4:e4b-it-q8_0
+OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud
 OLLAMA_NUM_CTX=65536
 OLLAMA_THINK=true
 
diff --git a/docs/api.md b/docs/api.md
index 57c045d..f92a8b9 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -34,7 +34,7 @@
     "description": "General-purpose assistant",
     "enabled_tools": ["todo", "web_search", "filesystem", "..."],
     "llm_backend": "ollama",
-    "model": "gemma4:26b-a4b-it-q4_K_M"
+    "model": "gemma4:31b-cloud"
   }
 ]
 ```
diff --git a/docs/config.md b/docs/config.md
index 9dabc4c..d452d6a 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -8,7 +8,7 @@
 |---|---|---|---|
 | `OLLAMA_HOST` | str | `http://localhost:11434` | Ollama server URL |
 | `OLLAMA_API_KEY` | str | `""` | Ollama Cloud API key for direct `https://ollama.com` access |
-| `OLLAMA_DEFAULT_MODEL` | str | `gemma4:e2b-it-q8_0` | Default model (can be overridden per profile) |
+| `OLLAMA_DEFAULT_MODEL` | str | `gemma4:31b-cloud` | Default model (can be overridden per profile) |
 | `OLLAMA_NUM_CTX` | int | `65536` | Context window size in tokens |
 | `OLLAMA_THINK` | bool | `true` | Enable extended reasoning (thinking) |
 | `OPENAI_API_KEY` | str | `""` | OpenAI API key (if using OpenAI backend) |
@@ -80,7 +80,7 @@
 ```dotenv
 OLLAMA_HOST=http://localhost:11434
 OLLAMA_API_KEY=
-OLLAMA_DEFAULT_MODEL=gemma4:e2b-it-q8_0
+OLLAMA_DEFAULT_MODEL=gemma4:31b-cloud
 OLLAMA_NUM_CTX=65536
 OLLAMA_THINK=true
 
diff --git a/docs/index.md b/docs/index.md
index 768ef83..2f7ebd8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -48,7 +48,7 @@
 
 - **Web framework**: FastAPI + uvicorn
 - **LLM**: Ollama (primary), OpenAI-compatible backend wired in
-- **Default model**: `gemma4:e2b-it-q8_0` (configurable per profile)
+- **Default model**: `gemma4:31b-cloud` (configurable per profile)
 - **Database**: SQLite via aiosqlite
 - **Logging**: structlog
 - **Config**: pydantic-settings (reads `.env`)
diff --git a/docs/profiles.md b/docs/profiles.md
index a73ca4c..1a88459 100644
--- a/docs/profiles.md
+++ b/docs/profiles.md
@@ -18,7 +18,7 @@
     system_prompt: str             # loaded from system_prompt.txt
     enabled_tools: list[str]       # tools available in the main loop
     llm_backend: str = "ollama"
-    model: str = "gemma4:26b-a4b-it-q4_K_M"
+    model: str = "gemma4:31b-cloud"
     max_iterations: int = 10
     temperature: float = 0.7
     planning_enabled: bool = False
@@ -48,9 +48,9 @@
 
 | ID | Name | Model | Temp | Planning |
 |---|---|---|---|---|
-| `secretary` | Personal Secretary | gemma4:26b-a4b-it-q4_K_M | 0.7 | Yes |
-| `server_admin` | Server Administrator | gemma4:26b-a4b-it-q4_K_M | 0.2 | Yes |
-| `developer` | Tool Developer | gemma4:26b-a4b-it-q4_K_M | 0.2 | Yes |
+| `secretary` | Personal Secretary | gemma4:31b-cloud | 0.7 | Yes |
+| `server_admin` | Server Administrator | gemma4:31b-cloud | 0.2 | Yes |
+| `developer` | Tool Developer | gemma4:31b-cloud | 0.2 | Yes |
 
 All profiles share a base tool set. User tools from `tools/enabled.json` are merged in at runtime.
 
@@ -84,7 +84,7 @@
   "name": "My Profile",
   "description": "...",
   "short_description": "...",
-  "model": "gemma4:26b-a4b-it-q4_K_M",
+  "model": "gemma4:31b-cloud",
   "temperature": 0.5,
   "max_iterations": 30,
   "planning_enabled": true,
diff --git a/docs/visual.html b/docs/visual.html
index ce00e17..b3b2f24 100644
--- a/docs/visual.html
+++ b/docs/visual.html
@@ -476,7 +476,7 @@
     </div>
     <div class="card">
       <div class="label">Default model</div>
-      <div class="value">gemma4:e2b-it-q8_0</div>
+      <div class="value">gemma4:31b-cloud</div>
       <div class="sub">Ollama, 2B active params</div>
     </div>
     <div class="card">
@@ -1196,19 +1196,19 @@
       <tr><th>Profile ID</th><th>Name</th><th>Model</th><th>Temp</th><th>Planning</th></tr>
       <tr>
         <td><code>secretary</code></td><td>Personal Secretary</td>
-        <td><code>gemma4:26b-a4b-it-q4_K_M</code></td>
+        <td><code>gemma4:31b-cloud</code></td>
         <td>0.7</td>
         <td><span class="badge badge-green">Yes</span></td>
       </tr>
       <tr>
         <td><code>server_admin</code></td><td>Server Administrator</td>
-        <td><code>gemma4:26b-a4b-it-q4_K_M</code></td>
+        <td><code>gemma4:31b-cloud</code></td>
         <td>0.2</td>
         <td><span class="badge badge-green">Yes</span></td>
       </tr>
       <tr>
         <td><code>smart_home</code></td><td>Smart Home Assistant</td>
-        <td><code>gemma4:26b-a4b-it-q4_K_M</code></td>
+        <td><code>gemma4:31b-cloud</code></td>
         <td>0.3</td>
         <td><span class="badge badge-green">Yes</span></td>
       </tr>
@@ -1291,7 +1291,7 @@
     <table>
       <tr><th>Variable</th><th>Default</th><th>Description</th></tr>
       <tr><td><code>OLLAMA_HOST</code></td><td><code>http://localhost:11434</code></td><td>Ollama server URL</td></tr>
-      <tr><td><code>OLLAMA_DEFAULT_MODEL</code></td><td><code>gemma4:e2b-it-q8_0</code></td><td>Default model (overridable per profile)</td></tr>
+      <tr><td><code>OLLAMA_DEFAULT_MODEL</code></td><td><code>gemma4:31b-cloud</code></td><td>Default model (overridable per profile)</td></tr>
       <tr><td><code>OLLAMA_NUM_CTX</code></td><td><code>65536</code></td><td>Context window size in tokens</td></tr>
       <tr><td><code>OLLAMA_THINK</code></td><td><code>true</code></td><td>Enable extended reasoning</td></tr>
     </table>
diff --git a/navi/config.py b/navi/config.py
index 65f9f4a..601b43b 100644
--- a/navi/config.py
+++ b/navi/config.py
@@ -9,7 +9,7 @@
 
     ollama_host: str = "http://localhost:11434"
     ollama_api_key: str = ""
-    ollama_default_model: str = "gemma4:e4b-it-q8_0"
+    ollama_default_model: str = "gemma4:31b-cloud"
     ollama_num_ctx: int = 65536
     ollama_think: bool = True
 
diff --git a/navi/profiles/base.py b/navi/profiles/base.py
index 9078ca4..7a99966 100644
--- a/navi/profiles/base.py
+++ b/navi/profiles/base.py
@@ -15,7 +15,7 @@
     system_prompt: str
     enabled_tools: list[str]  # tool names; resolved by ToolRegistry at runtime
     llm_backend: str = "ollama"  # backend key, e.g. "ollama", "openai"
-    model: str = "gemma4:26b-a4b-it-q4_K_M"
+    model: str = "gemma4:31b-cloud"
     max_iterations: int = 10
     temperature: float = 0.7
     planning_enabled: bool = False  # if True, run a planning LLM call before the main loop
diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json
index a320cf5..39d7496 100644
--- a/navi/profiles/developer/config.json
+++ b/navi/profiles/developer/config.json
@@ -9,7 +9,7 @@
     "key_tools": "filesystem, code_exec, terminal, web_search, web_view, spawn_agent"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b",
+  "model": "gemma4:31b-cloud",
   "temperature": 0.2,
   "max_iterations": 35,
   "planning_enabled": true,
diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py
index 7441c0d..4402efa 100644
--- a/navi/profiles/loader.py
+++ b/navi/profiles/loader.py
@@ -59,7 +59,7 @@
                 system_prompt=system_prompt,
                 enabled_tools=config["enabled_tools"],
                 llm_backend=config.get("llm_backend", "ollama"),
-                model=config.get("model", "gemma4:26b-a4b-it-q4_K_M"),
+                model=config.get("model", "gemma4:31b-cloud"),
                 temperature=config.get("temperature", 0.7),
                 max_iterations=config.get("max_iterations", 20),
                 planning_enabled=config.get("planning_enabled", False),
diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json
index fef7db3..a144b16 100644
--- a/navi/profiles/secretary/config.json
+++ b/navi/profiles/secretary/config.json
@@ -9,7 +9,7 @@
     "key_tools": "web_search, web_view, filesystem, code_exec, gmail, todo, scratchpad, spawn_agent, memory"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b",
+  "model": "gemma4:31b-cloud",
   "temperature": 0.5,
   "max_iterations": 25,
   "planning_enabled": true,
diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json
index e6ff0ce..64039b4 100644
--- a/navi/profiles/server_admin/config.json
+++ b/navi/profiles/server_admin/config.json
@@ -9,7 +9,7 @@
     "key_tools": "ssh_exec, terminal, filesystem, code_exec, web_search, spawn_agent, memory"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b",
+  "model": "gemma4:31b-cloud",
   "temperature": 0.2,
   "max_iterations": 20,
   "planning_enabled": true,
diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json
index b89c274..8cb5a99 100644
--- a/navi/profiles/tool_developer/config.json
+++ b/navi/profiles/tool_developer/config.json
@@ -9,7 +9,7 @@
     "key_tools": "write_tool, reload_tools, delete_tool, test_tool, filesystem, terminal, code_exec, memory"
   },
   "llm_backend": "ollama",
-  "model": "gemma4:31b",
+  "model": "gemma4:31b-cloud",
   "temperature": 0.2,
   "max_iterations": 35,
   "planning_enabled": true,

Variable	Default	Description
`OLLAMA_HOST`	`http://localhost:11434`	Ollama server URL
`OLLAMA_DEFAULT_MODEL`	`gemma4:e2b-it-q8_0`	Default model (overridable per profile)
`OLLAMA_DEFAULT_MODEL`	`gemma4:31b-cloud`	Default model (overridable per profile)
`OLLAMA_NUM_CTX`	`65536`	Context window size in tokens
`OLLAMA_THINK`	`true`	Enable extended reasoning