diff --git a/client/index.html b/client/index.html index bbc008c..0b16d5b 100644 --- a/client/index.html +++ b/client/index.html @@ -1,64 +1,16 @@ - - - - Navi - - - - - - -
- - - - - -
-
- - Select a profile and start a new chat - -
- -
-
-
💬
-

Start a new conversation

-
-
- -
-
- -
- - - - -
-
-
- -
- - + + + + + + + Navi + + + + +
+ diff --git a/navi/core/agent.py b/navi/core/agent.py index ff1395c..cccc8d1 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -74,12 +74,13 @@ to Ollama → Ollama halts generation → GPU load drops to idle. """ first = True + chunk_task: asyncio.Task | None = None try: while True: timeout = first_chunk_timeout if first else chunk_timeout # Create one task per chunk; reuse across poll iterations so we # don't accidentally start multiple concurrent __anext__ calls. - chunk_task: asyncio.Task = asyncio.ensure_future(stream_gen.__anext__()) + chunk_task = asyncio.ensure_future(stream_gen.__anext__()) elapsed = 0.0 while True: @@ -93,6 +94,7 @@ await chunk_task except (asyncio.CancelledError, Exception): pass + chunk_task = None return if elapsed >= timeout: chunk_task.cancel() @@ -100,6 +102,7 @@ await chunk_task except (asyncio.CancelledError, Exception): pass + chunk_task = None label = "first token (context may be too large for this model)" if first else "next token" raise LLMBackendError( f"LLM timed out after {elapsed:.0f}s waiting for {label}." @@ -108,8 +111,10 @@ try: chunk = chunk_task.result() except StopAsyncIteration: + chunk_task = None return + chunk_task = None first = False yield chunk @@ -117,6 +122,14 @@ return finally: + # Cancel any in-flight __anext__ task so we don't leave a zombie + # coroutine holding an open HTTP connection to Ollama. + if chunk_task is not None and not chunk_task.done(): + chunk_task.cancel() + try: + await chunk_task + except (asyncio.CancelledError, Exception): + pass # Closing the generator terminates the HTTP connection to Ollama, # which signals it to stop generating (GPU returns to idle). try: @@ -505,7 +518,7 @@ if not turn_tool_calls: # Final response — text already streamed above - assistant_msg = Message(role="assistant", content=accumulated_text, + assistant_msg = Message(role="assistant", content=accumulated_text or None, created_at=datetime.now(timezone.utc)) session.messages.append(assistant_msg) session.context.append(assistant_msg) diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json index 39d21dc..b6b825f 100644 --- a/navi/profiles/developer/config.json +++ b/navi/profiles/developer/config.json @@ -15,6 +15,7 @@ "reload_tools", "delete_tool", "list_tools", "tool_manual", "test_tool", "spawn_agent", - "share_file" + "share_file", + "email_manager" ] -} +} \ No newline at end of file diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json index 38e7bf3..5acc51e 100644 --- a/navi/profiles/secretary/config.json +++ b/navi/profiles/secretary/config.json @@ -15,6 +15,7 @@ "list_tools", "tool_manual", "spawn_agent", "share_file", - "weather" + "weather", + "email_manager" ] -} +} \ No newline at end of file diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json index 2719c0f..fa4d21e 100644 --- a/navi/profiles/server_admin/config.json +++ b/navi/profiles/server_admin/config.json @@ -14,6 +14,7 @@ "memory", "list_tools", "tool_manual", "spawn_agent", - "share_file" + "share_file", + "email_manager" ] -} +} \ No newline at end of file