diff --git a/navi/api/websocket.py b/navi/api/websocket.py
index 65dddb2..aafc556 100644
--- a/navi/api/websocket.py
+++ b/navi/api/websocket.py
@@ -25,7 +25,7 @@
 
 from navi.api.deps import get_session_store
 from navi.core import Agent, ContextCompressed, StreamEnd, TextDelta, ThinkingDelta, ThinkingEnd, ToolEvent
-from navi.core.events import PlanReady, ProfileSwitched, StreamStopped, ToolStarted, TurnThinking
+from navi.core.events import PlanningStatus, PlanReady, ProfileSwitched, StreamStopped, ToolStarted, TurnThinking
 from navi.exceptions import MaxIterationsReached, NaviError, SessionNotFound
 
 router = APIRouter(tags=["websocket"])
@@ -109,6 +109,8 @@
         return {"type": "profile_switched", "profile_id": event.profile_id, "profile_name": event.profile_name}
     if isinstance(event, StreamStopped):
         return {"type": "stream_stopped"}
+    if isinstance(event, PlanningStatus):
+        return {"type": "planning_status", "phase": event.phase, "label": event.label}
     if isinstance(event, PlanReady):
         return {"type": "plan_ready", "plan": event.plan}
     return None
diff --git a/navi/config.py b/navi/config.py
index 5931772..c0f1aba 100644
--- a/navi/config.py
+++ b/navi/config.py
@@ -65,7 +65,7 @@
 
     # Context compression
     context_compression_enabled: bool = True
-    context_compression_threshold: float = 0.80   # trigger at 80% of ollama_num_ctx
+    context_compression_threshold: float = 0.70   # trigger at 70% of ollama_num_ctx
     context_keep_recent: int = 8                   # conversational turns to keep verbatim
     context_summary_temperature: float = 0.3
     context_summary_max_tokens: int = 1024         # max output tokens for the summary LLM call
diff --git a/navi/core/agent.py b/navi/core/agent.py
index 23f7b80..3273fd0 100644
--- a/navi/core/agent.py
+++ b/navi/core/agent.py
@@ -34,6 +34,7 @@
     AgentEvent,
     AIHelperTokensUsed,
     ContextCompressed,
+    PlanningStatus,
     PlanReady,
     StreamEnd,
     StreamStopped,
@@ -462,8 +463,12 @@
         # injected into session.context as an assistant message so the model
         # naturally continues from it, and emitted as PlanReady for the UI.
         if profile.planning_enabled:
-            for _ev in await self._run_planning(session, profile, llm, mem):
-                yield _ev
+            async for _ev in self._run_planning(session, profile, llm, mem, tool_schemas):
+                if isinstance(_ev, AIHelperTokensUsed):
+                    # Accumulate planning token usage into the turn total (not forwarded to WS)
+                    _subagent_tokens += _ev.total
+                else:
+                    yield _ev
 
         # Tool-calling loop — uses stream_complete() for every turn so thinking
         # is captured in real-time via ThinkingDelta/ThinkingEnd events.
@@ -671,22 +676,50 @@
         profile,
         llm: LLMBackend,
         mem: "Message | None",
-    ) -> list[AgentEvent]:
+        tool_schemas: list | None = None,
+    ):
         """
-        Two-phase planning:
+        Three-phase planning (async generator):
 
-        Phase 1 — Analysis: reformulate the task, identify subtasks and unknowns.
-                             Fast signal-check: if DIRECT, skip planning entirely.
+        Phase 1 — Analysis (think=True): reformulate the task, identify subtasks and
+                   unknowns. Returns DIRECT for simple requests to skip planning entirely.
         Phase 2 — Execution plan: assign each subtask to a specific executor
-                                   (TOOL / AGENT / SELF) using a structured format.
+                   (TOOL / AGENT / SELF) with awareness of actually available tools.
+        Phase 3 — AIHelper critic: independent pass that validates executor assignments
+                   against the real tool list and fixes any mismatches.
 
-        The phase-2 plan is injected into session.context as an assistant message
-        so the model naturally continues from it in the main loop.
-        Returns [PlanReady] on success, [] on skip or failure.
+        Yields PlanningStatus before each phase so the UI can show progress,
+        then yields PlanReady when the final plan is ready.
+        Yields nothing if planning is skipped.
         """
         import re as _re
 
-        # ── Phase 1: Task analysis ─────────────────────────────────────────────
+        # ── Build compact tool list for Phase 2 / Phase 3 ─────────────────────
+        if tool_schemas:
+            tool_lines = []
+            for schema in tool_schemas:
+                fn = schema.function if hasattr(schema, "function") else schema.get("function", {})
+                name = fn.get("name", "")
+                desc = (fn.get("description") or "").split("\n")[0][:80]
+                tool_lines.append(f"  - {name}: {desc}")
+            available_tools_block = (
+                "Available tools (use these exact names for TOOL: assignments):\n"
+                + "\n".join(tool_lines)
+                + "\n\n"
+            )
+            tool_names_list = ", ".join(
+                (schema.function if hasattr(schema, "function") else schema.get("function", {})).get("name", "")
+                for schema in tool_schemas
+            )
+        else:
+            available_tools_block = ""
+            tool_names_list = ""
+
+        # Read stop event once — checked between all phases
+        _stop = current_stop_event.get()
+
+        # ── Phase 1: Task analysis (with reasoning) ────────────────────────────
+        yield PlanningStatus(phase=1, label="Analysing task...")
         phase1_system = Message(
             role="system",
             content=(
@@ -714,22 +747,33 @@
 
         try:
             r1 = await asyncio.wait_for(
-                llm.complete(phase1_ctx, tools=None, temperature=0.3, model=profile.model, think=False),
+                llm.complete(phase1_ctx, tools=None, temperature=0.3, model=profile.model, think=True),
                 timeout=settings.llm_complete_timeout,
             )
             analysis = (r1.content or "").strip()
         except asyncio.TimeoutError:
             log.warning("agent.planning_phase1_timeout", timeout=settings.llm_complete_timeout)
-            return []
+            return
         except Exception:
             log.warning("agent.planning_phase1_failed", exc_info=True)
-            return []
+            return
+
+        if r1.prompt_tokens or r1.completion_tokens:
+            yield AIHelperTokensUsed(
+                prompt_tokens=r1.prompt_tokens or 0,
+                completion_tokens=r1.completion_tokens or 0,
+            )
 
         if not analysis or analysis.upper().startswith("DIRECT"):
             log.debug("agent.planning_skipped", reason="direct")
-            return []
+            return
+
+        if _stop and _stop.is_set():
+            log.debug("agent.planning_stopped", phase=1)
+            return
 
         # ── Phase 2: Execution plan ────────────────────────────────────────────
+        yield PlanningStatus(phase=2, label="Building execution plan...")
         phase2_system = Message(
             role="system",
             content=(
@@ -739,8 +783,9 @@
                 "Task analysis:\n\n"
                 f"{analysis}\n\n"
                 "---\n\n"
-                "Now write the execution plan. For each subtask assign a specific executor:\n"
-                "- TOOL: <tool_name>  — a single tool call is enough\n"
+                + available_tools_block
+                + "Now write the execution plan. For each subtask assign a specific executor:\n"
+                "- TOOL: <tool_name>  — a single tool call is enough; use exact tool names from the list above\n"
                 "- AGENT: <profile_id>  — needs multiple steps; a subagent must handle it\n"
                 "- SELF  — final synthesis or a context-dependent single action only\n\n"
                 "Required output format (use exactly this structure):\n\n"
@@ -773,20 +818,88 @@
             plan_text = (r2.content or "").strip()
         except asyncio.TimeoutError:
             log.warning("agent.planning_phase2_timeout", timeout=settings.llm_complete_timeout)
-            return []
+            return
         except Exception:
             log.warning("agent.planning_phase2_failed", exc_info=True)
-            return []
+            return
+
+        if r2.prompt_tokens or r2.completion_tokens:
+            yield AIHelperTokensUsed(
+                prompt_tokens=r2.prompt_tokens or 0,
+                completion_tokens=r2.completion_tokens or 0,
+            )
 
         if not plan_text:
-            return []
+            return
 
         # Must have at least one numbered step
         if not _re.search(r"^\s*\d+[\.\)]", plan_text, _re.MULTILINE):
             log.debug("agent.planning_skipped", reason="no_numbered_steps")
-            return []
+            return
 
-        # Warn if executor assignments are missing (plan may be malformed)
+        if _stop and _stop.is_set():
+            log.debug("agent.planning_stopped", phase=2)
+            return
+
+        # ── Phase 3: AIHelper plan critic ──────────────────────────────────────
+        yield PlanningStatus(phase=3, label="Reviewing plan...")
+        # Independent pass: validates and corrects executor assignments against
+        # the actual tool list. Falls back to Phase 2 plan on any failure.
+        if tool_names_list:
+            try:
+                critic_system = (
+                    "You are a plan validator for an AI assistant. "
+                    "You receive a task analysis, an execution plan draft, and a list of available tools.\n\n"
+                    "Your job:\n"
+                    "1. Every \"TOOL: <name>\" step must use a name from the available tools list exactly. "
+                    "If a tool name is wrong or missing, replace it with the closest available tool, "
+                    "or change the executor to SELF.\n"
+                    "2. A TOOL step must require only a single tool call. "
+                    "If a step clearly needs multiple calls, change its executor to SELF.\n"
+                    "3. Steps must be in logical dependency order — no step may depend on a result "
+                    "from a later step.\n\n"
+                    "Rules:\n"
+                    "- Do not add or remove steps.\n"
+                    "- Do not rewrite descriptions unless fixing an executor.\n"
+                    "- Return the corrected plan in the same ## Plan format.\n"
+                    "- If the plan is already correct, return it unchanged.\n"
+                    "- No commentary. No preamble. Return the plan only."
+                )
+                critic_prompt = (
+                    f"Available tools: {tool_names_list}\n\n"
+                    f"Task analysis:\n{analysis}\n\n"
+                    f"Execution plan draft:\n{plan_text}"
+                )
+                phase3_ctx = [
+                    Message(role="system", content=critic_system),
+                    Message(role="user", content=critic_prompt),
+                ]
+
+                r3 = await asyncio.wait_for(
+                    llm.complete(phase3_ctx, tools=None, temperature=0.1, model=profile.model, think=False),
+                    timeout=settings.llm_complete_timeout,
+                )
+                reviewed = (r3.content or "").strip()
+
+                if r3.prompt_tokens or r3.completion_tokens:
+                    yield AIHelperTokensUsed(
+                        prompt_tokens=r3.prompt_tokens or 0,
+                        completion_tokens=r3.completion_tokens or 0,
+                    )
+
+                # Accept the reviewed plan only if it still has numbered steps
+                if reviewed and _re.search(r"^\s*\d+[\.\)]", reviewed, _re.MULTILINE):
+                    plan_text = reviewed
+                    log.debug("agent.planning_phase3_applied", length=len(plan_text))
+                else:
+                    log.warning("agent.planning_phase3_invalid", preview=reviewed[:120])
+
+            except asyncio.TimeoutError:
+                log.warning("agent.planning_phase3_timeout")
+            except Exception:
+                log.warning("agent.planning_phase3_failed", exc_info=True)
+
+        # Warn if executor assignments are still missing (plan may be malformed)
         if not _re.search(r"(TOOL:|AGENT:|→\s*SELF)", plan_text):
             log.warning("agent.planning_no_executors", hint="plan lacks TOOL/AGENT/SELF assignments")
 
@@ -794,8 +907,8 @@
         # and into messages (with is_plan flag) so the UI can render a plan card after reload.
         session.context.append(Message(role="assistant", content=plan_text))
         session.messages.append(Message(role="assistant", content=plan_text, is_plan=True))
-        log.debug("agent.plan_ready", phases=2, length=len(plan_text))
-        return [PlanReady(plan=plan_text)]
+        log.debug("agent.plan_ready", phases=3, length=len(plan_text))
+        yield PlanReady(plan=plan_text)
 
     async def _run_workers(
         self,
diff --git a/navi/core/compressor.py b/navi/core/compressor.py
index c79420f..73840be 100644
--- a/navi/core/compressor.py
+++ b/navi/core/compressor.py
@@ -18,11 +18,19 @@
 
 _SUMMARIZE_SYSTEM = (
     "You are summarizing a conversation history to free up context space. "
-    "Produce a structured factual summary covering: key facts the user shared, "
-    "decisions made, tasks completed or in progress, important outputs or findings, "
-    "any code or config snippets that were produced. "
-    "Use bullet points grouped by topic. Be thorough — capture enough detail that "
-    "the assistant can continue the conversation without the original messages. "
+    "Produce a structured factual summary. Lead with the most important section:\n\n"
+    "## Current goal\n"
+    "One sentence: what the assistant is working toward right now.\n\n"
+    "## Work state\n"
+    "What has been completed. What is still pending. Any blockers encountered.\n\n"
+    "## Key facts\n"
+    "Facts the user shared, decisions made, constraints discovered.\n\n"
+    "## Outputs\n"
+    "File paths created or modified, config values set, commands run and their results, "
+    "code or config snippets that were produced (include verbatim if short).\n\n"
+    "## Errors\n"
+    "Failures encountered and how they were resolved (or that they remain unresolved).\n\n"
+    "Be thorough — the assistant will continue the conversation using only this summary. "
     "Do not include greetings, filler, or meta-commentary about the summary itself."
 )
 
diff --git a/navi/core/events.py b/navi/core/events.py
index 47b7384..48c38f4 100644
--- a/navi/core/events.py
+++ b/navi/core/events.py
@@ -77,6 +77,18 @@
 
 
 @dataclass
+class PlanningStatus:
+    """Emitted at the start of each planning phase to show progress in the UI.
+
+    phase: 1 = Analysis, 2 = Execution plan, 3 = Plan review (AIHelper critic).
+    label: short human-readable description shown next to the spinner.
+    """
+
+    phase: int
+    label: str
+
+
+@dataclass
 class PlanReady:
     """Emitted before the main agent loop when profile.planning_enabled is True.
 
@@ -125,5 +137,5 @@
 AgentEvent = (
     ToolStarted | ToolEvent | TextDelta | ThinkingDelta | ThinkingEnd
     | StreamEnd | StreamStopped | ContextCompressed | TurnThinking | ProfileSwitched
-    | PlanReady | SubagentComplete | AIHelperTokensUsed
+    | PlanningStatus | PlanReady | SubagentComplete | AIHelperTokensUsed
 )
diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json
index ed3b606..0b88f1d 100644
--- a/navi/profiles/secretary/config.json
+++ b/navi/profiles/secretary/config.json
@@ -10,8 +10,8 @@
   },
   "llm_backend": "ollama",
   "model": "gemma4:26b-a4b-it-q4_K_M",
-  "temperature": 0.7,
-  "max_iterations": 40,
+  "temperature": 0.5,
+  "max_iterations": 25,
   "planning_enabled": true,
   "enabled_tools": [
     "todo", "scratchpad", "reflect", "switch_profile", "list_profiles",
diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json
index 2e38fe4..36c567c 100644
--- a/navi/profiles/server_admin/config.json
+++ b/navi/profiles/server_admin/config.json
@@ -11,7 +11,7 @@
   "llm_backend": "ollama",
   "model": "gemma4:26b-a4b-it-q4_K_M",
   "temperature": 0.2,
-  "max_iterations": 40,
+  "max_iterations": 20,
   "planning_enabled": true,
   "enabled_tools": [
     "todo", "scratchpad", "reflect", "switch_profile", "list_profiles",
diff --git a/navi/tools/scratchpad.py b/navi/tools/scratchpad.py
index 15d13c7..81df02a 100644
--- a/navi/tools/scratchpad.py
+++ b/navi/tools/scratchpad.py
@@ -12,6 +12,8 @@
     description = (
         "Persistent working memory for the current session. Use it to record findings, "
         "track artifacts, and pass context to sub-agents. Organised into named sections. "
+        "MANDATORY: at the start of any multi-step task, write a 'goal' section stating "
+        "in one sentence what you are trying to achieve. Update it if the goal changes. "
         "Always read the relevant section before composing a final answer — "
         "it may contain findings from earlier tool calls."
     )
@@ -32,6 +34,7 @@
                 "type": "string",
                 "description": (
                     "Which section to target. Conventional names: "
+                    "goal (current objective — always write this first), "
                     "findings (discovered facts), artifacts (file paths, outputs), "
                     "errors (tracebacks, failures), verification (DoD checklist), "
                     "context_transfer (briefings for sub-agents), main (general). "
diff --git a/navi/tools/terminal.py b/navi/tools/terminal.py
index 101ab2d..7be3ea6 100644
--- a/navi/tools/terminal.py
+++ b/navi/tools/terminal.py
@@ -15,6 +15,7 @@
 from .base import Tool, ToolResult
 
 _TIMEOUT = 300
+_MAX_OUTPUT_CHARS = 5_000
 
 
 class TerminalTool(Tool):
@@ -80,10 +81,14 @@
             if stderr:
                 output_parts.append(f"[stderr]\n{stderr.decode(errors='replace')}")
 
+            combined = "\n".join(output_parts) or "(no output)"
+            if len(combined) > _MAX_OUTPUT_CHARS:
+                combined = combined[:_MAX_OUTPUT_CHARS] + f"\n…[truncated — {len(combined)} chars total]"
+
             success = proc.returncode == 0
             return ToolResult(
                 success=success,
-                output="\n".join(output_parts) or "(no output)",
+                output=combined,
                 metadata={"returncode": proc.returncode},
                 error=None if success else f"Exit code {proc.returncode}",
             )
@@ -128,10 +133,14 @@
             if stderr:
                 output_parts.append(f"[stderr]\n{stderr.decode(errors='replace')}")
 
+            combined = "\n".join(output_parts) or "(no output)"
+            if len(combined) > _MAX_OUTPUT_CHARS:
+                combined = combined[:_MAX_OUTPUT_CHARS] + f"\n…[truncated — {len(combined)} chars total]"
+
             success = proc.returncode == 0
             return ToolResult(
                 success=success,
-                output="\n".join(output_parts) or "(no output)",
+                output=combined,
                 metadata={"returncode": proc.returncode},
                 error=None if success else f"Exit code {proc.returncode}",
             )
diff --git a/navi/tools/web_search.py b/navi/tools/web_search.py
index c1d0b45..743bf99 100644
--- a/navi/tools/web_search.py
+++ b/navi/tools/web_search.py
@@ -46,7 +46,7 @@
             try:
                 results = await asyncio.to_thread(
                     lambda b=backend: list(
-                        DDGS().text(query, backend=b, max_results=max_results)
+                        DDGS().text(query, backend=b, max_results=max_results, region="wt-wt")
                     )
                 )
                 if results:
@@ -106,7 +106,7 @@
         async with httpx.AsyncClient(timeout=15.0) as client:
             resp = await client.get(
                 "https://api.search.brave.com/res/v1/web/search",
-                params={"q": query, "count": min(max_results, 20)},
+                params={"q": query, "count": min(max_results, 20), "country": "ALL"},
                 headers={
                     "Accept": "application/json",
                     "Accept-Encoding": "gzip",
@@ -128,6 +128,7 @@
                     "q": query,
                     "format": "json",
                     "engines": "google,bing,duckduckgo",
+                    "language": "all",
                 },
             )
             resp.raise_for_status()
diff --git a/webclient/src/components/messages/ThinkingCard.vue b/webclient/src/components/messages/ThinkingCard.vue
index 7c19f1d..41d4c84 100644
--- a/webclient/src/components/messages/ThinkingCard.vue
+++ b/webclient/src/components/messages/ThinkingCard.vue
@@ -6,14 +6,25 @@
       <span v-if="!msg.thinking.done" class="spinner"></span>
       <i class="ph ph-caret-down thinking-chevron"></i>
     </summary>
-    <div class="thinking-body">{{ msg.thinking.text }}</div>
+    <div class="thinking-body">{{ cleanedText }}</div>
   </details>
 </template>
 
 <script setup>
-import { ref } from 'vue'
+import { ref, computed } from 'vue'
 
-defineProps({ msg: { type: Object, required: true } })
+const props = defineProps({ msg: { type: Object, required: true } })
 
 const detailsEl = ref(null)
+
+const cleanedText = computed(() => {
+  const raw = props.msg.thinking?.text ?? ''
+  return raw
+    // Strip <thought> / </thought> XML tags (Ollama sometimes leaks these)
+    .replace(/^<thought>\s*/i, '')
+    .replace(/\s*<\/thought>\s*$/i, '')
+    // Strip a bare "thought" or "thought:" that appears alone on the first line
+    .replace(/^thought\s*:?\s*\n+/i, '')
+    .trim()
+})
 </script>
diff --git a/webclient/src/composables/useWebSocket.js b/webclient/src/composables/useWebSocket.js
index e7550d4..b96217d 100644
--- a/webclient/src/composables/useWebSocket.js
+++ b/webclient/src/composables/useWebSocket.js
@@ -109,6 +109,7 @@
       case 'thinking_delta':    chat.onThinkingDelta(event.delta ?? ''); break
       case 'thinking_end':      chat.onThinkingEnd(); break
       case 'turn_thinking':     chat.onTurnThinking(event); break
+      case 'planning_status':   chat.onPlanningStatus(event); break
       case 'plan_ready':        chat.onPlanReady(event); break
       case 'tool_started':      chat.onToolStarted(event); break
       case 'tool_call':         chat.onToolCall(event); break