diff --git a/navi/core/agent.py b/navi/core/agent.py index 14068c6..b995ab0 100644 --- a/navi/core/agent.py +++ b/navi/core/agent.py @@ -618,38 +618,15 @@ settings.context_compression_threshold, ) ): - try: - result = await compress_context( - context=session.context, - llm=llm, - model=profile.model, - temperature=settings.context_summary_temperature, - keep_recent=settings.context_keep_recent, - max_tokens=settings.context_summary_max_tokens, - ) - if result is not None: - new_context, summary_text = result - count_before = len(session.context) - session.context = new_context - session.context_token_count = 0 - session.messages.append(Message( - role="system", - is_compression=True, - content=summary_text, - )) - log.info( - "agent.preturn_compress", - session_id=session_id, - before=count_before, - after=len(new_context), - ) - yield ContextCompressed( - messages_before=count_before, - messages_after=len(new_context), - summary=summary_text, - ) - except Exception: - log.warning("agent.preturn_compress_failed", session_id=session_id, exc_info=True) + event = await self._compress_session_context( + session=session, + llm=llm, + model=profile.model, + session_id=session_id, + reason="preturn", + ) + if event: + yield event display_text = display_message if display_message is not None else user_message user_msg_display = Message(role="user", content=display_text, images=images or None, @@ -709,6 +686,32 @@ yield StreamStopped() return + if settings.context_compression_enabled and iteration > 0: + preflight_ctx = self._ctx_builder.build( + session.context, + profile, + mem, + extra_system=ctx_injections, + session_id=session_id, + ) + estimated_tokens = self._estimate_context_tokens(preflight_ctx) + if should_compress( + estimated_tokens, + settings.ollama_num_ctx, + settings.context_compression_threshold, + ): + event = await self._compress_session_context( + session=session, + llm=llm, + model=profile.model, + session_id=session_id, + reason="midturn", + keep_recent_messages=max(12, settings.context_keep_recent * 2), + ) + if event: + yield event + _prev_tokens = 0 + accumulated_text = "" accumulated_thinking = "" turn_tool_calls: list[ToolCallRequest] | None = None @@ -1034,6 +1037,72 @@ worker=type(worker).__name__, exc_info=True) return events + async def _compress_session_context( + self, + session, + llm: LLMBackend, + model: str, + session_id: str, + reason: str, + keep_recent_messages: int | None = None, + ) -> ContextCompressed | None: + """Compress session.context and persist it, returning a UI event when it changed.""" + try: + result = await compress_context( + context=session.context, + llm=llm, + model=model, + temperature=settings.context_summary_temperature, + keep_recent=settings.context_keep_recent, + max_tokens=settings.context_summary_max_tokens, + keep_recent_messages=keep_recent_messages, + ) + except Exception: + log.warning( + "agent.context_compress_failed", + session_id=session_id, + reason=reason, + exc_info=True, + ) + return None + + if result is None: + return None + + new_context, summary_text = result + count_before = len(session.context) + session.context = new_context + session.context_token_count = 0 + session.messages.append(Message( + role="system", + is_compression=True, + content=summary_text, + )) + await self._sessions.save(session) + + log.info( + "agent.context_compress", + session_id=session_id, + reason=reason, + before=count_before, + after=len(new_context), + ) + + return ContextCompressed( + messages_before=count_before, + messages_after=len(new_context), + summary=summary_text, + context_tokens=session.context_token_count, + max_context_tokens=settings.ollama_num_ctx, + ) + + @staticmethod + def _estimate_context_tokens(context: list[Message]) -> int: + """Conservative local estimate used before the next LLM call returns real token counts.""" + chars = sum(len(m.content or "") for m in context) + imgs = sum(500 for m in context if m.images) + return chars // 4 + imgs + def _tool_list(self, enabled: list[str]) -> list[Tool]: names = list(enabled) extra = _load_user_enabled_tools() diff --git a/navi/core/compressor.py b/navi/core/compressor.py index dcd1723..efd460b 100644 --- a/navi/core/compressor.py +++ b/navi/core/compressor.py @@ -55,7 +55,9 @@ def partition_messages( - messages: list[Message], keep_recent: int + messages: list[Message], + keep_recent: int, + keep_recent_messages: int | None = None, ) -> tuple[list[Message], list[Message]]: """ Returns (to_summarize, to_keep). @@ -79,6 +81,10 @@ turns.append(current) if len(turns) <= keep_recent: + if keep_recent_messages is not None: + intra_turn = partition_current_turn_messages(turns, keep_recent_messages) + if intra_turn is not None: + return intra_turn return [], non_system # nothing old enough to compress old_turns = turns[:-keep_recent] @@ -89,6 +95,36 @@ return to_summarize, to_keep +def partition_current_turn_messages( + turns: list[list[Message]], + keep_recent_messages: int, +) -> tuple[list[Message], list[Message]] | None: + """ + Mid-turn fallback for long autonomous tool loops. + + A long chain of assistant/tool iterations after one user message is one + conversational turn, so turn-based compression may have nothing to compress. + Keep the current user request and the newest messages verbatim, then summarize + older messages from the same in-flight turn. + """ + if not turns: + return None + + current_turn = turns[-1] + if len(current_turn) <= keep_recent_messages + 1: + return None + + head = [current_turn[0]] if current_turn and current_turn[0].role == "user" else [] + tail_start = max(len(head), len(current_turn) - keep_recent_messages) + to_summarize = [m for turn in turns[:-1] for m in turn] + current_turn[len(head):tail_start] + to_keep = head + current_turn[tail_start:] + + if len(to_summarize) < 2: + return None + + return to_summarize, to_keep + + def _format_for_summary(messages: list[Message]) -> tuple[str, list[str]]: """ Render messages as plain text for the summarization prompt. @@ -154,6 +190,7 @@ temperature: float, keep_recent: int, max_tokens: int | None = None, + keep_recent_messages: int | None = None, ) -> tuple[list[Message], str] | None: """ Summarize old messages in the LLM context and return a shorter context list. @@ -170,7 +207,11 @@ Exceptions propagate to the caller (CompressionWorker catches them). """ system_msgs = [m for m in context if m.role == "system"] - to_summarize, to_keep = partition_messages(context, keep_recent) + to_summarize, to_keep = partition_messages( + context, + keep_recent, + keep_recent_messages=keep_recent_messages, + ) if len(to_summarize) < 2: return None # nothing substantial to compress diff --git a/navi/core/events.py b/navi/core/events.py index 49ef0fb..53cc43c 100644 --- a/navi/core/events.py +++ b/navi/core/events.py @@ -109,6 +109,8 @@ messages_before: int messages_after: int summary: str = "" # the actual summary text produced by the LLM + context_tokens: int | None = None + max_context_tokens: int = 0 def to_wire(self) -> dict: return { @@ -116,6 +118,8 @@ "messages_before": self.messages_before, "messages_after": self.messages_after, "summary": self.summary, + "context_tokens": self.context_tokens, + "max_context_tokens": self.max_context_tokens, } diff --git a/navi/profiles/modeler_3d/config.json b/navi/profiles/modeler_3d/config.json index d3b9dee..73939ff 100644 --- a/navi/profiles/modeler_3d/config.json +++ b/navi/profiles/modeler_3d/config.json @@ -14,8 +14,8 @@ "gemma4:31b-cloud", "qwen3.6:27b" ], - "temperature": 0.25, - "max_iterations": 45, + "temperature": 0.35, + "max_iterations": 50, "planning_enabled": true, "planning_mandatory": false, "planning_phase1_enabled": true, @@ -32,9 +32,10 @@ "subagent_planning_enabled": false, "subagent_think_enabled": false, "subagent_tools": [ - "scratchpad", + "web_search", + "web_view", "filesystem", - "scad_lint" + "image_view" ], "enabled_tools": [ "todo", diff --git a/navi/profiles/modeler_3d/subagent_system_prompt.txt b/navi/profiles/modeler_3d/subagent_system_prompt.txt index 04eb57a..72c2bbd 100644 --- a/navi/profiles/modeler_3d/subagent_system_prompt.txt +++ b/navi/profiles/modeler_3d/subagent_system_prompt.txt @@ -1,36 +1,37 @@ -You are an OpenSCAD transcription worker. +You are a focused research assistant for 3D modeling tasks. -Your job is to convert the provided technical specification and design plan into one clean `.scad` file. Do not redesign the object. +Your job is to gather missing factual information from the web or local files so the parent agent can build a physically coherent 3D model. You do not design geometry, write OpenSCAD, validate models, or make final decisions. Required workflow: 1. Read the briefing and parent session context. -2. If the briefing references transferred design context, read `scratchpad` once. -3. Write the requested `.scad` file with `filesystem`. -4. Run `scad_lint` on that exact file. -5. If lint reports errors you can fix directly, edit the same file once and run `scad_lint` again. -6. Return the final response. +2. Identify the exact missing facts requested by the parent agent. +3. Use `web_search`, `web_view`, `filesystem`, and `image_view` as needed to gather evidence. +4. Prefer primary sources, product pages, datasheets, manuals, dimensions in local files, or images provided by the user. +5. Return only the facts found, source paths/URLs, confidence, and unresolved gaps. Hard rules: -- Use the exact output path from the parent session context or briefing. -- The parent session directory is the only valid output directory. -- Never write outside the parent session directory. +- Do not write, edit, append, move, delete, compile, render, publish, or share files. +- Do not call modeling, linting, terminal, code execution, publication, sharing, or todo tools. +- Do not design the model or propose a full implementation plan. +- Do not write or revise `.scad`, `.stl`, `.svg`, or other artifact files. +- Do not review generated model source unless the parent specifically asks for factual lookup in files. - Do not derive paths from a `subagent_*` runtime id. -- Do not change the axis convention from the briefing. -- If the briefing says `X=length`, the length axis is X. If it says `Z=height`, height is Z. -- Cylinders, cavities, holes, slots, and recesses must align with the specified axes. -- Do not change dimensions, tolerances, defaults, or object orientation unless the briefing explicitly instructs it. -- Do not add extra features that are not in the brief. -- Do not use scratchpad for progress tracking. -- Do not compile, render, publish, or share artifacts unless explicitly requested. +- If you use `image_view`, remember that the image becomes visible to you only. The user does not see that image unless the parent agent later publishes or shares it. +- If exact dimensions cannot be verified, say so directly and provide the closest trustworthy references without inventing exact values. -If the briefing is insufficient to write correct geometry, return: +If the briefing is insufficient to research the missing facts, return: BLOCKED: [specific missing detail] Final response format: -SCAD_PATH: [path] -LINT: [pass/fail and key messages] -NOTES: [one concise sentence about implemented parameters or blocker] +FOUND: +- [fact, value/unit, source URL/path, confidence] + +UNRESOLVED: +- [missing fact and why it could not be verified] + +NOTES: +- [short caveats relevant to modeling] diff --git a/navi/profiles/modeler_3d/system_prompt.txt b/navi/profiles/modeler_3d/system_prompt.txt index e267fa6..6f5d657 100644 --- a/navi/profiles/modeler_3d/system_prompt.txt +++ b/navi/profiles/modeler_3d/system_prompt.txt @@ -1,4 +1,4 @@ -You are a 3D model designer specialized in physically coherent geometry. You create real-world objects, not artistic renders. The result must have the correct physical pose, orientation, scale, proportions, and functional relationships before any manufacturing concern is considered. +You are a 3D model designer specialized in physically coherent geometry. You create real-world objects, not artistic renders. The result must have the correct physical pose, orientation, scale, proportions, and functional relationships before any construction shortcut is considered. ## Physical geometry mindset @@ -8,9 +8,18 @@ - Avoid visual-only details: floating shapes, paper-thin walls, disconnected decorative fragments, impossible internal geometry, and features that only look good in a render. - Think in millimeters. Choose explicit dimensions and tolerances. - Establish the object's real-world orientation before modeling. If a battery, tube, shaft, handle, bracket, or enclosure is described as horizontal, vertical, side-by-side, stacked, inserted, clamped, or mounted, the generated geometry must match that physical pose consistently. -- Do not let manufacturability assumptions override the requested physical arrangement. A holder for horizontal cells must model horizontal cell cavities, not vertical pockets with horizontal details added later. +- Do not let construction convenience override the requested physical arrangement. A holder for horizontal cells must model horizontal cell cavities, not vertical pockets with horizontal details added later. - Prefer robust, simple geometry over fragile decorative complexity unless the user explicitly asks for a decorative object. +## Non-goal: 3D printing + +Do not optimize for 3D printing unless the user explicitly asks about printing. + +- Do not optimize the model for FDM, SLA, slicers, supports, layers, infill, print bed placement, or print orientation. +- Do not provide printing settings, material recommendations, support recommendations, infill recommendations, or print-orientation advice in `scratchpad`, source comments, todo validation, preview checks, or final responses unless the user explicitly asks. +- If physical correctness conflicts with print convenience, physical correctness wins. +- Discuss printing only when the user directly asks for printing advice or print-specific constraints. + ## Tool contract Use the dedicated 3D tools and use OpenSCAD as the only geometry generator. @@ -20,11 +29,11 @@ 3. **`model_3d`** — compile the `.scad` into a binary `.stl`. 4. **`render_3d`** — generate PNG previews from several angles for your own inspection. 5. **`image_view`** — inspect each PNG so YOU can verify geometry. PNG previews are for Navi, not for the user. -6. **`content_publish`** — publish the final STL only after internal checks pass. Include `source_filename` when a real `.scad` source exists in the same session directory. +6. **`content_publish`** — publish the final STL after internal checks pass. Include `source_filename` when a real `.scad` source exists in the same session directory. Do not use Python, CadQuery, trimesh, numpy-stl, or raw mesh scripts to generate or validate the final STL. OpenSCAD compilation plus OpenSCAD-rendered previews are the validation path for this profile. -For medium or complex modeling tasks, use `spawn_agent` to delegate only the OpenSCAD authoring/editing step after you have written a clear technical specification and design plan. The main agent remains responsible for requirements, sanity checks, linting, compilation, preview inspection, revision decisions, publication, and the final user response. +Use `spawn_agent` only to gather missing factual information from the web or local files. Do not use subagents to design geometry, write OpenSCAD, review generated source, compile, render, publish, or make final modeling decisions. The main agent remains responsible for requirements, source authoring, file edits, sanity checks, linting, compilation, preview inspection, revision decisions, publication, and the final user response. ## Technical specification first @@ -55,7 +64,7 @@ - Switch to a parametric measurement template: make the source easy to adjust and expose all critical dimensions as named parameters. - Record the missing measurements in `technical_spec` as `required_user_measurements`. - Add measurement placeholders for interfaces: screw spacing, fan diameter, hole diameter, board/heatsink length, width, height, offsets, clearance, and mounting surface positions as relevant. -- In the final response, clearly say the artifact is a parametric draft/template that must be adjusted to measured hardware before use or fabrication. +- In the final response, clearly say the artifact is a parametric draft/template that must be adjusted to measured hardware before use. - If the missing measurement determines whether geometry can physically fit at all, ask the user for it before generating final geometry. Never invent exact compatibility dimensions for a functional fit part after failed or irrelevant search results. Reasonable defaults may be used only for a parametric template and must be labeled as defaults. @@ -93,32 +102,38 @@ The plan must describe construction strategy, not paste implementation code. -## OpenSCAD authoring delegation +## Subagent research delegation -For any medium or complex 3D task, split the work: +Subagents in this profile exist only to gather missing information. They are not designers, coders, reviewers, validators, or publishers. -- Main agent: understand the user request, create `technical_spec`, create `design_plan`, run `parameter_sanity_check`, decide whether user input is truly blocking, validate the generated source, compile, render, inspect, revise, publish, and respond. -- Subagent: write or revise the `.scad` source from the already prepared specification. It should receive a clean brief, not the full conversation. +Use `spawn_agent` when a missing factual detail requires web or file research and would otherwise distract the main modeling loop. Examples: -Delegate OpenSCAD authoring when the model has more than trivial primitive geometry, multiple dimensions/interfaces, functional fit requirements, tolerances, moving/clip/screw features, orientation-sensitive cavities, decorative geometry with physical constraints, or any expected revision cycle. +- exact dimensions from a product page, datasheet, manual, or local document; +- reference photos or diagrams that need visual inspection; +- local source files, notes, or downloaded assets that may contain measurements; +- identifying whether a requested real object has known variants or revisions. -Do not delegate raw user requests. Delegate only after the technical specification and design plan are clear enough that another agent can implement them without guessing. +Do not delegate raw modeling requests. Delegate only narrow research questions with explicit facts to find. -The `spawn_agent` call for authoring must include: +The `spawn_agent` call for research must include: -- exact session files directory; -- target `.scad` filename; -- object purpose and object class; -- all dimensions, defaults, tolerances, physical orientation, and axis convention from `technical_spec`; -- modules/construction strategy from `design_plan`; -- required source comments contract; -- instruction to write clean production OpenSCAD only, with no prose, self-correction notes, abandoned modules, or commented-out failed attempts; -- instruction to call `filesystem write` for the `.scad` file and then `scad_lint`; -- expected final output: the `.scad` path, lint result, and concise notes about implemented parameters. +- the exact missing facts to find; +- preferred source types, such as official pages, manuals, datasheets, local files, or user-provided images; +- any known model/revision names and units; +- the expected output format: found facts with source URLs/paths, confidence, and unresolved gaps. -The subagent must not publish the artifact and should not compile/render unless the main brief explicitly asks for it. The main agent must run final `scad_lint`, `model_3d`, `render_3d`, and `image_view` after the subagent returns. +The subagent may use only web search, web page viewing, filesystem reads, and image viewing. It must not write files, edit files, compile, render, publish, share, run commands, update todos, or call modeling tools. -If the first lint/compile/preview pass reveals issues, prefer a focused repair brief to the same authoring pattern: pass only the relevant error output, preview findings, and required changes. Keep the repair task narrow. +After the subagent returns, the main agent must decide how the facts affect `technical_spec`, `design_plan`, and the model. Do not outsource modeling judgment to the subagent. + +## Publication requirement + +Every successful 3D modeling task must end by publishing the final STL with `content_publish`. Creating, compiling, rendering, or inspecting a file is not enough for completion. The user receives the result through the published artifact. + +- If the user asked for a 3D model and an STL was generated successfully, call `content_publish` before the final text response. +- If the model is a parametric draft/template because exact dimensions are unknown, still publish the draft STL after clearly recording that caveat in `preview_check` and in the final response. +- Do not finish with only a filesystem path, source file, preview image, or promise to publish later. +- Skip publication only when the user explicitly asks not to publish, the model cannot compile to STL, or a true blocker prevents a usable artifact. ## Action continuity @@ -147,15 +162,16 @@ 2. **Write technical specification** — use `scratchpad` to store `technical_spec`. Choose explicit defaults for non-blocking unknowns. 3. **Plan physical geometry** — use `scratchpad` to store `design_plan` with scale, modules, axis convention, physical orientation, tolerances, weak points, and preview checks. 4. **Run parameter sanity check** — for functional, mechanical, or parametric fit parts, use `scratchpad` to store `parameter_sanity_check` before writing `.scad`. -5. **Delegate or write OpenSCAD** — for medium/complex tasks, call `spawn_agent` with a clean authoring brief so a subagent writes the `.scad` in a small context. For trivial models, you may write the `.scad` yourself. The file must be clean, parameterized, in the session directory, and include the source comments contract. -6. **Lint OpenSCAD** — call `scad_lint(path="...scad")` yourself after the file exists, even if the subagent already linted it. Fix every error before compiling. Treat warnings as reasons to inspect and revise when they affect geometry or source cleanliness. -7. **Compile STL** — call `model_3d(scad_path=..., output_path=...)`. -8. **Handle compile result** — proceed only if `model_3d` returns success. If it returns `openscad_compile_error`, `no_output`, `scad_not_found`, `wrong_session_dir`, or another error, fix the cause and compile again. -9. **Render previews** — call `render_3d(source="...stl", views=["iso","front","top"])` or other relevant views. -10. **Inspect every preview** — call `image_view` on every PNG path returned by `render_3d`. Do not publish PNG previews unless the user explicitly asks for preview images. -11. **Run preview checklist** — compare all inspected previews against `technical_spec` and `design_plan`. Record the checklist result in `scratchpad` section `preview_check`. -12. **Revise before publishing** — if lint, compilation output, or preview inspection reveals a substantial issue, edit the `.scad`, lint again, recompile, re-render, and inspect again. -13. **Publish final STL** — only after the model passes the geometry gate, call `content_publish(filename="...stl", content_type="stl", source_filename="...scad")`. +5. **Write OpenSCAD yourself** — create or edit the `.scad` source with `filesystem write` or `filesystem edit`. The file must be clean, parameterized, in the session directory, and include the source comments contract. +6. **Lint OpenSCAD** — call `scad_lint(path="...scad")` after the file exists. Fix every error before compiling. Treat warnings as reasons to inspect and revise when they affect geometry or source cleanliness. +7. **Research missing facts when useful** — if exact dimensions, reference images, or local documents are needed, call `spawn_agent` with a narrow research brief. Use the returned facts to update `technical_spec` or `design_plan`; do not ask the subagent to design, write, review, compile, render, or publish the model. +8. **Compile STL** — call `model_3d(scad_path=..., output_path=...)`. +9. **Handle compile result** — proceed only if `model_3d` returns success. If it returns `openscad_compile_error`, `no_output`, `scad_not_found`, `wrong_session_dir`, or another error, fix the cause and compile again. +10. **Render previews** — call `render_3d(source="...stl", views=["iso","front","top"])` or other relevant views. +11. **Inspect every preview** — call `image_view` on every PNG path returned by `render_3d`. Do not publish PNG previews unless the user explicitly asks for preview images. +12. **Run preview checklist** — compare all inspected previews against `technical_spec` and `design_plan`. Record the checklist result in `scratchpad` section `preview_check`. +13. **Revise before publishing** — if lint, compilation output, researched facts, or preview inspection reveals a substantial issue, edit the `.scad`, lint again, recompile, re-render, and inspect again. +14. **Publish final STL** — after the model passes the geometry gate, call `content_publish(filename="...stl", content_type="stl", source_filename="...scad")`. This step is mandatory for a successful task. ## Source comments contract @@ -192,7 +208,7 @@ - `scratchpad` section `preview_check` exists and says `Revision required: no`, or records the revision that was made after a failed check. - OpenSCAD warnings/errors from `model_3d` or `render_3d` were handled instead of ignored. - `scad_lint` was run on the final `.scad`, and all lint errors were fixed before compilation. -- For medium or complex tasks, OpenSCAD authoring was delegated to a focused subagent after `technical_spec` and `design_plan` existed, or `preview_check` records why direct authoring was simpler. +- Any subagent use was limited to gathering missing factual information from web pages, local files, or images. The main agent made all modeling decisions. - For functional fit parts, exact compatibility dimensions are either verified, provided by the user, or the artifact is explicitly labeled as a parametric template that requires user measurements. If any item fails, revise the design before publishing. @@ -254,11 +270,12 @@ ## Output discipline - Always produce a single STL file per request unless the user explicitly asks for an assembly. +- Always publish the final STL artifact before the final response when the task succeeds. - Name files descriptively: `bracket_20x40_m3.stl`, not `model.stl`. - Track progress with the `todo` tool. Do not write manual checkbox status lists in the final message. - Do not claim a task is complete until the corresponding tool result has verified it and `todo` has been updated. - Do not narrate future tool actions as a substitute for performing them. Execute the tool call first, then explain the result. -- Do not claim the model is manifold, watertight, or fabrication-ready unless a tool explicitly verified that exact property. This also applies to `todo.validation`, `scratchpad`, `preview_check`, and final responses. OpenSCAD compilation and preview images are useful checks, but they are not proof of manifoldness. +- Do not claim the model is manifold or watertight unless a tool explicitly verified that exact property. This also applies to `todo.validation`, `scratchpad`, `preview_check`, and final responses. OpenSCAD compilation and preview images are useful checks, but they are not proof of manifoldness. - Do not use HTML formatting such as `
` in user-facing messages. - Do not paste OpenSCAD code into the text response after publishing; the user can inspect the source through the artifact source viewer. - Do not duplicate the published visual content in text. After publishing, provide only a concise note with assumptions, dimensions, orientation, and tolerance caveats. diff --git a/navi/tools/image_view.py b/navi/tools/image_view.py index 855b50b..82cccff 100644 --- a/navi/tools/image_view.py +++ b/navi/tools/image_view.py @@ -25,7 +25,8 @@ "a file path, a URL, a screenshot you produced, or any visual you need to inspect. " "Images the user attached directly to a message (visible inline in your context) " "don't need this tool; just analyse them from what you see. " - "The loaded image becomes visible to you in the next message." + "The loaded image becomes visible to you in the next message, but it is NOT shown to the user. " + "Do not assume the user has seen it unless you publish or share it through another tool." ) parameters = { "type": "object", @@ -50,7 +51,10 @@ size_kb = len(raw) // 1024 return ToolResult( success=True, - output=f"Image loaded ({size_kb} KB, {mime}). It will appear in the next turn.", + output=( + f"Image loaded ({size_kb} KB, {mime}). It will appear in your next turn. " + "The user cannot see this image from image_view alone." + ), metadata={"base64": b64, "mime": mime, "is_image": True}, ) except Exception as e: diff --git a/navi/workers/compressor.py b/navi/workers/compressor.py index 51b8bb9..2ae0cc5 100644 --- a/navi/workers/compressor.py +++ b/navi/workers/compressor.py @@ -65,4 +65,6 @@ messages_before=count_before, messages_after=len(session.context), summary=summary_text, + context_tokens=session.context_token_count, + max_context_tokens=ctx.max_context_tokens, )]) diff --git a/tests/unit/core/test_compressor.py b/tests/unit/core/test_compressor.py index 416f9a1..af0ae6f 100644 --- a/tests/unit/core/test_compressor.py +++ b/tests/unit/core/test_compressor.py @@ -76,6 +76,22 @@ assert len(old) == 3 assert len(recent) == 2 + def test_mid_turn_fallback_keeps_user_and_recent_messages(self): + msgs = [Message(role="user", content="build a model")] + for i in range(5): + msgs.append(Message( + role="assistant", + tool_calls=[ToolCallRequest(id=str(i), name="fs", arguments={})], + )) + msgs.append(Message(role="tool", content=f"result {i}", name="fs", tool_call_id=str(i))) + + old, recent = partition_messages(msgs, keep_recent=8, keep_recent_messages=4) + + assert old + assert recent[0].role == "user" + assert recent[0].content == "build a model" + assert len(recent) == 5 # original user message + 4 newest in-flight messages + class TestFormatForSummary: def test_user_message(self): @@ -168,3 +184,28 @@ ) system_msgs = [m for m in new_context if m.role == "system"] assert len(system_msgs) == 2 + + async def test_compresses_long_current_turn_when_requested(self): + backend = FakeLLMBackend(responses=["mid-turn summary"]) + context = [Message(role="user", content="build a model")] + for i in range(5): + context.append(Message( + role="assistant", + tool_calls=[ToolCallRequest(id=str(i), name="fs", arguments={})], + )) + context.append(Message(role="tool", content=f"large result {i}", name="fs", tool_call_id=str(i))) + + new_context, summary = await compress_context( + context=context, + llm=backend, + model="test", + temperature=0.3, + keep_recent=8, + keep_recent_messages=4, + ) + + assert summary == "mid-turn summary" + assert new_context[0].is_summary is True + assert new_context[1].role == "user" + assert new_context[1].content == "build a model" + assert len(new_context) == 6 # summary + user + 4 recent messages diff --git a/tests/unit/core/test_events.py b/tests/unit/core/test_events.py index 82153e9..67f75e7 100644 --- a/tests/unit/core/test_events.py +++ b/tests/unit/core/test_events.py @@ -101,6 +101,7 @@ assert wire["messages_before"] == 10 assert wire["messages_after"] == 3 assert wire["summary"] == "summary text" + assert wire["context_tokens"] is None class TestProfileSwitched: diff --git a/webclient/src/stores/chat.js b/webclient/src/stores/chat.js index ae7a776..f23a037 100644 --- a/webclient/src/stores/chat.js +++ b/webclient/src/stores/chat.js @@ -352,8 +352,8 @@ } streaming.value = false - if (data?.context_tokens) contextTokens.value = data.context_tokens - if (data?.max_context_tokens) maxContextTokens.value = data.max_context_tokens + if (data?.context_tokens != null) contextTokens.value = data.context_tokens + if (data?.max_context_tokens != null) maxContextTokens.value = data.max_context_tokens // Update session preview if (currentId.value && msg?.text) { @@ -383,6 +383,8 @@ } function onContextCompressed(data) { + if (data?.context_tokens != null) contextTokens.value = data.context_tokens + if (data?.max_context_tokens != null) maxContextTokens.value = data.max_context_tokens messages.value.push({ id: `compress_${Date.now()}`, role: 'system', diff --git a/webclient/tests/unit/stores/chat.test.js b/webclient/tests/unit/stores/chat.test.js index 389a56b..9a5f756 100644 --- a/webclient/tests/unit/stores/chat.test.js +++ b/webclient/tests/unit/stores/chat.test.js @@ -208,8 +208,18 @@ it('onContextCompressed pushes notice', () => { const store = useChatStore() - store.onContextCompressed({ messages_before: 10, messages_after: 5, summary: 's' }) + store.contextTokens = 78 + store.maxContextTokens = 100 + store.onContextCompressed({ + messages_before: 10, + messages_after: 5, + summary: 's', + context_tokens: 0, + max_context_tokens: 100, + }) expect(store.messages[0]).toMatchObject({ type: 'compression_notice', before: 10, after: 5 }) + expect(store.contextTokens).toBe(0) + expect(store.maxContextTokens).toBe(100) }) it('appendUserMessage adds user card', () => {