diff --git a/navi/core/agent.py b/navi/core/agent.py
index 4363c7e..af452a0 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -282,6 +282,7 @@
                 model=profile.model,
                 top_k=profile.top_k,
                 top_p=profile.top_p,
+                num_thread=profile.num_thread,
             )
 
             if response.finish_reason == "stop" or not response.tool_calls:
@@ -453,6 +454,7 @@
                     think=profile.think_enabled,
                     top_k=profile.top_k,
                     top_p=profile.top_p,
+                    num_thread=profile.num_thread,
                 ),
                 stop_event=stop_event,
                 first_chunk_timeout=settings.llm_stream_first_chunk_timeout,
@@ -731,6 +733,7 @@
                     think=profile.think_enabled,
                     top_k=profile.top_k,
                     top_p=profile.top_p,
+                    num_thread=profile.num_thread,
                 ),
                 stop_event=stop_event,
                 first_chunk_timeout=settings.llm_stream_first_chunk_timeout,
diff --git a/navi/llm/fallback.py b/navi/llm/fallback.py
index 3c2862a..d1eaff7 100644
--- a/navi/llm/fallback.py
+++ b/navi/llm/fallback.py
@@ -76,6 +76,7 @@
         max_tokens: int | None = None,
         top_k: int | None = None,
         top_p: float | None = None,
+        num_thread: int | None = None,
     ) -> LLMResponse:
         models = self._model_list(model)
         last_err: Exception = LLMBackendError("No backends configured")
@@ -90,7 +91,7 @@
                     return await self._get_client(server).complete(
                         messages, tools=tools, temperature=temperature,
                         model=m, think=think, max_tokens=max_tokens,
-                        top_k=top_k, top_p=top_p,
+                        top_k=top_k, top_p=top_p, num_thread=num_thread,
                     )
                 except LLMConnectionError as e:
                     log.warning("fallback.server_dead", host=server.host, error=str(e))
@@ -112,6 +113,7 @@
         model: "list[str] | str | None" = None,
         top_k: int | None = None,
         top_p: float | None = None,
+        num_thread: int | None = None,
     ) -> AsyncGenerator[LLMChunk, None]:
         models = self._model_list(model)
         last_err: Exception = LLMBackendError("No backends configured")
@@ -124,7 +126,7 @@
                     continue
                 try:
                     gen = self._get_client(server).stream(
-                        messages, temperature=temperature, model=m, top_k=top_k, top_p=top_p
+                        messages, temperature=temperature, model=m, top_k=top_k, top_p=top_p, num_thread=num_thread,
                     )
                     first = await gen.__anext__()
                 except StopAsyncIteration:
@@ -156,6 +158,7 @@
         think: bool | None = None,
         top_k: int | None = None,
         top_p: float | None = None,
+        num_thread: int | None = None,
     ) -> AsyncGenerator[LLMChunk, None]:
         models = self._model_list(model)
         last_err: Exception = LLMBackendError("No backends configured")
@@ -169,7 +172,7 @@
                 try:
                     gen = self._get_client(server).stream_complete(
                         messages, tools=tools, temperature=temperature, model=m, think=think,
-                        top_k=top_k, top_p=top_p,
+                        top_k=top_k, top_p=top_p, num_thread=num_thread,
                     )
                     first = await gen.__anext__()
                 except StopAsyncIteration:
diff --git a/navi/llm/ollama.py b/navi/llm/ollama.py
index c12d27c..e09fe71 100644
--- a/navi/llm/ollama.py
+++ b/navi/llm/ollama.py
@@ -36,6 +36,7 @@
     max_tokens: int | None = None,
     top_k: int | None = None,
     top_p: float | None = None,
+    num_thread: int | None = None,
 ) -> dict:
     opts: dict = {"temperature": temperature, "num_ctx": settings.ollama_num_ctx}
     # think=None → use global setting; think=False → force off even if global is True
@@ -48,6 +49,8 @@
         opts["top_k"] = top_k
     if top_p is not None:
         opts["top_p"] = top_p
+    if num_thread is not None:
+        opts["num_thread"] = num_thread
     return opts
 
 
@@ -92,13 +95,14 @@
         max_tokens: int | None = None,
         top_k: int | None = None,
         top_p: float | None = None,
+        num_thread: int | None = None,
     ) -> LLMResponse:
         resolved = _resolve_model(model, self.model)
         try:
             kwargs: dict = {
                 "model": resolved,
                 "messages": _to_ollama_messages(messages),
-                "options": _base_options(temperature, think=think, max_tokens=max_tokens, top_k=top_k, top_p=top_p),
+                "options": _base_options(temperature, think=think, max_tokens=max_tokens, top_k=top_k, top_p=top_p, num_thread=num_thread),
                 "stream": False,
             }
             if tools:
@@ -139,13 +143,14 @@
         model: "list[str] | str | None" = None,
         top_k: int | None = None,
         top_p: float | None = None,
+        num_thread: int | None = None,
     ) -> AsyncGenerator[LLMChunk, None]:
         resolved = _resolve_model(model, self.model)
         try:
             async for chunk in await self._client.chat(
                 model=resolved,
                 messages=_to_ollama_messages(messages),
-                options=_base_options(temperature, top_k=top_k, top_p=top_p),
+                options=_base_options(temperature, top_k=top_k, top_p=top_p, num_thread=num_thread),
                 stream=True,
             ):
                 thinking = getattr(chunk.message, "thinking", None) or None
@@ -172,13 +177,14 @@
         think: bool | None = None,
         top_k: int | None = None,
         top_p: float | None = None,
+        num_thread: int | None = None,
     ) -> AsyncGenerator[LLMChunk, None]:
         resolved = _resolve_model(model, self.model)
         try:
             kwargs: dict = {
                 "model": resolved,
                 "messages": _to_ollama_messages(messages),
-                "options": _base_options(temperature, think=think, top_k=top_k, top_p=top_p),
+                "options": _base_options(temperature, think=think, top_k=top_k, top_p=top_p, num_thread=num_thread),
                 "stream": True,
             }
             if tools:
diff --git a/navi/profiles/base.py b/navi/profiles/base.py
index 8e1553d..ee96761 100644
--- a/navi/profiles/base.py
+++ b/navi/profiles/base.py
@@ -26,6 +26,9 @@
     temperature: float = 0.7
     top_k: int | None = None
     top_p: float | None = None
+    # Number of CPU threads for local inference. None = Ollama default (physical cores).
+    # Cloud models ignore this option.
+    num_thread: int | None = None
     planning_enabled: bool = False  # if True, run a planning LLM call before the main loop
 
     # Profile discoverability — used for system prompt injection and list_profiles tool.
diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json
index 89c528c..b0ca59f 100644
--- a/navi/profiles/developer/config.json
+++ b/navi/profiles/developer/config.json
@@ -10,8 +10,8 @@
   },
   "llm_backend": "ollama",
   "model": [
-    "qwen3.6:35b",
     "gemma4:31b-cloud",
+    "qwen3.6:35b",
     "gemma4:26b-a4b-it-q4_K_M"
   ],
   "temperature": 0.45,
@@ -64,5 +64,6 @@
   "planning_phase2_enabled": true,
   "planning_phase3_enabled": true,
   "top_k": 40,
-  "top_p": 0.88
+  "top_p": 0.88,
+  "num_thread": 11
 }
diff --git a/navi/profiles/discuss/config.json b/navi/profiles/discuss/config.json
index 136e29c..6844811 100644
--- a/navi/profiles/discuss/config.json
+++ b/navi/profiles/discuss/config.json
@@ -4,8 +4,8 @@
   "description": "Creative partner for Q&A, brainstorming, and idea exploration. High creativity, free-form thinking.",
   "short_description": "Creative Q&A and idea discussion — best for open questions, brainstorming, and exploring concepts.",
   "model": [
-    "qwen3.6:35b",
     "gemma4:31b-cloud",
+    "qwen3.6:35b",
     "gemma4:26b-a4b-it-q4_K_M"
   ],
   "temperature": 0.85,
@@ -39,5 +39,6 @@
   "adaptive_replan_enabled": false,
   "subagent_planning_enabled": false,
   "top_k": 80,
-  "top_p": 0.95
+  "top_p": 0.95,
+  "num_thread": 11
 }
diff --git a/navi/profiles/loader.py b/navi/profiles/loader.py
index 456805a..76f2200 100644
--- a/navi/profiles/loader.py
+++ b/navi/profiles/loader.py
@@ -74,6 +74,7 @@
                 temperature=config.get("temperature", 0.7),
                 top_k=config.get("top_k", None),
                 top_p=config.get("top_p", None),
+                num_thread=config.get("num_thread", None),
                 max_iterations=config.get("max_iterations", 20),
                 planning_enabled=config.get("planning_enabled", False),
                 planning_mandatory=config.get("planning_mandatory", False),
diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json
index 8876f69..a9f0d3b 100644
--- a/navi/profiles/secretary/config.json
+++ b/navi/profiles/secretary/config.json
@@ -10,8 +10,8 @@
   },
   "llm_backend": "ollama",
   "model": [
-    "qwen3.6:35b",
     "gemma4:31b-cloud",
+    "qwen3.6:35b",
     "gemma4:26b-a4b-it-q4_K_M"
   ],
   "temperature": 0.65,
@@ -62,5 +62,6 @@
   "planning_phase2_enabled": true,
   "planning_phase3_enabled": true,
   "top_k": 50,
-  "top_p": 0.90
+  "top_p": 0.90,
+  "num_thread": 11
 }
diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json
index 551fa91..80d6c70 100644
--- a/navi/profiles/server_admin/config.json
+++ b/navi/profiles/server_admin/config.json
@@ -10,8 +10,8 @@
   },
   "llm_backend": "ollama",
   "model": [
-    "qwen3.6:35b",
     "gemma4:31b-cloud",
+    "qwen3.6:35b",
     "gemma4:26b-a4b-it-q4_K_M"
   ],
   "temperature": 0.30,
@@ -64,5 +64,6 @@
   "planning_phase2_enabled": true,
   "planning_phase3_enabled": true,
   "top_k": 30,
-  "top_p": 0.80
+  "top_p": 0.80,
+  "num_thread": 11
 }
diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json
index ac9f986..9765021 100644
--- a/navi/profiles/tool_developer/config.json
+++ b/navi/profiles/tool_developer/config.json
@@ -10,8 +10,8 @@
   },
   "llm_backend": "ollama",
   "model": [
-    "qwen3.6:35b",
     "gemma4:31b-cloud",
+    "qwen3.6:35b",
     "gemma4:26b-a4b-it-q4_K_M"
   ],
   "temperature": 0.35,
@@ -73,5 +73,6 @@
   "planning_phase2_enabled": true,
   "planning_phase3_enabled": true,
   "top_k": 40,
-  "top_p": 0.85
+  "top_p": 0.85,
+  "num_thread": 11
 }