diff --git a/navi/core/registry.py b/navi/core/registry.py index 9cd133d..f17ec96 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -15,6 +15,7 @@ TerminalTool, Tool, WebSearchTool, + WebViewTool, ) from navi.tools.list_tools import ListToolsTool from navi.tools.reload_tools import ReloadToolsTool @@ -98,7 +99,7 @@ write_tool = WriteToolTool(registry=tools) list_tool = ListToolsTool(registry=tools) manual_tool = ToolManualTool(registry=tools) - for builtin in [WebSearchTool(), FilesystemTool(), HttpRequestTool(), + for builtin in [WebSearchTool(), FilesystemTool(), HttpRequestTool(), WebViewTool(), CodeExecTool(), TerminalTool(), SshExecTool(), ImageViewTool(), reload_tool, write_tool, list_tool, manual_tool]: tools.register(builtin, builtin=True) diff --git a/navi/profiles/secretary.py b/navi/profiles/secretary.py index d9f65b6..545c846 100644 --- a/navi/profiles/secretary.py +++ b/navi/profiles/secretary.py @@ -15,7 +15,7 @@ 6. image_view — whenever an image path or URL is mentioned. Output style: concise, structured. When researching, include sources. Match tone and format to what was asked.""", - enabled_tools=["web_search", "http_request", "filesystem", "code_exec", "terminal", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], + enabled_tools=["web_search", "web_view", "http_request", "filesystem", "code_exec", "terminal", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], model="gemma4:e4b-it-q8_0", temperature=0.7, ) diff --git a/navi/profiles/server_admin.py b/navi/profiles/server_admin.py index 05bd752..1cb667a 100644 --- a/navi/profiles/server_admin.py +++ b/navi/profiles/server_admin.py @@ -17,7 +17,7 @@ Workflow: gather data first (logs, status, metrics), diagnose, then act. Before destructive or irreversible operations, state what you're about to do and why.""", - enabled_tools=["terminal", "filesystem", "http_request", "web_search", "ssh_exec", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], + enabled_tools=["terminal", "filesystem", "http_request", "web_view", "web_search", "ssh_exec", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], model="gemma4:e4b-it-q8_0", temperature=0.2, ) diff --git a/navi/profiles/smart_home.py b/navi/profiles/smart_home.py index 4bc0770..f3f8348 100644 --- a/navi/profiles/smart_home.py +++ b/navi/profiles/smart_home.py @@ -17,7 +17,7 @@ Before writing any HA config to disk, validate structure in code_exec. Before toggling devices or triggering automations, confirm if the action is irreversible.""", - enabled_tools=["http_request", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], + enabled_tools=["http_request", "web_view", "filesystem", "code_exec", "terminal", "ssh_exec", "image_view", "reload_tools", "write_tool", "list_tools", "tool_manual"], model="gemma4:e4b-it-q8_0", temperature=0.3, ) diff --git a/navi/tools/__init__.py b/navi/tools/__init__.py index eef15ec..b0d7962 100644 --- a/navi/tools/__init__.py +++ b/navi/tools/__init__.py @@ -6,6 +6,7 @@ from .ssh_exec import SshExecTool from .terminal import TerminalTool from .web_search import WebSearchTool +from .web_view import WebViewTool __all__ = [ "Tool", @@ -17,4 +18,5 @@ "TerminalTool", "SshExecTool", "ImageViewTool", + "WebViewTool", ] diff --git a/navi/tools/http_request.py b/navi/tools/http_request.py index e3a440f..1f01674 100644 --- a/navi/tools/http_request.py +++ b/navi/tools/http_request.py @@ -12,9 +12,11 @@ class HttpRequestTool(Tool): name = "http_request" description = ( - "Make an HTTP request (GET/POST/PUT/PATCH/DELETE) to any URL. " - "Use for REST APIs, local services (e.g. Home Assistant), webhooks, or fetching raw web content. " - "Supports JSON body and custom headers. Returns HTTP status and response body." + "Make a raw HTTP request (GET/POST/PUT/PATCH/DELETE). " + "Use for REST APIs, JSON endpoints, local services (e.g. Home Assistant), webhooks, " + "or any service requiring custom headers or auth tokens. " + "Returns the raw response body — use web_view instead if you need to read a web page " + "meant for humans (web_view handles JavaScript and returns clean readable text)." ) parameters = { "type": "object", diff --git a/navi/tools/web_view.py b/navi/tools/web_view.py new file mode 100644 index 0000000..ab347d4 --- /dev/null +++ b/navi/tools/web_view.py @@ -0,0 +1,136 @@ +"""Web view tool — open a URL in a real browser and extract readable content.""" + +import base64 +import re + +from playwright.async_api import async_playwright + +from .base import Tool, ToolResult + +_TIMEOUT = 30_000 # ms for page load +_MAX_TEXT = 20_000 # chars — cap huge pages + + +def _clean(text: str) -> str: + """Collapse excessive blank lines and strip trailing whitespace.""" + lines = [line.rstrip() for line in text.splitlines()] + result: list[str] = [] + blank_run = 0 + for line in lines: + if line == "": + blank_run += 1 + if blank_run <= 2: + result.append("") + else: + blank_run = 0 + result.append(line) + return "\n".join(result).strip() + + +class WebViewTool(Tool): + name = "web_view" + description = ( + "Open a URL in a real headless browser and return clean readable text. " + "Use this to browse web pages — it executes JavaScript, waits for the page " + "to finish loading, and strips HTML/scripts so you get the actual content. " + "Optionally takes a screenshot so you can see the page visually. " + "Use http_request instead when working with REST APIs, JSON endpoints, " + "or services that need custom headers/auth." + ) + parameters = { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "Full URL to open (must start with http:// or https://)", + }, + "screenshot": { + "type": "boolean", + "description": "If true, also capture a screenshot of the page (default: false)", + }, + "wait_until": { + "type": "string", + "enum": ["load", "domcontentloaded", "networkidle"], + "description": ( + "When to consider the page loaded. " + "'networkidle' (default) waits for no network activity — best for SPAs. " + "'load' is faster but may miss dynamic content." + ), + }, + }, + "required": ["url"], + } + + async def execute(self, params: dict) -> ToolResult: + url: str = params["url"].strip() + take_screenshot: bool = params.get("screenshot", False) + wait_until: str = params.get("wait_until", "networkidle") + + if not url.startswith(("http://", "https://")): + return ToolResult(success=False, output="URL must start with http:// or https://", + error="invalid url") + + try: + async with async_playwright() as pw: + browser = await pw.chromium.launch(headless=True) + context = await browser.new_context( + viewport={"width": 1280, "height": 800}, + user_agent=( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + ), + ) + page = await context.new_page() + + try: + await page.goto(url, wait_until=wait_until, timeout=_TIMEOUT) + except Exception: + # Timeout waiting for networkidle is often fine — page may still be usable + pass + + title = await page.title() + final_url = page.url + + # Extract readable text: hide noise, grab visible text + text = await page.evaluate("""() => { + const kill = ['script','style','noscript','iframe', + 'nav','header','footer','aside', + '[role="navigation"]','[role="banner"]', + '[role="contentinfo"]']; + const clone = document.body.cloneNode(true); + kill.forEach(sel => { + clone.querySelectorAll(sel).forEach(el => el.remove()); + }); + return clone.innerText || clone.textContent || ''; + }""") + + text = _clean(text) + if len(text) > _MAX_TEXT: + text = text[:_MAX_TEXT] + f"\n\n[… truncated at {_MAX_TEXT} chars]" + + output_parts = [] + if title: + output_parts.append(f"Title: {title}") + if final_url != url: + output_parts.append(f"Final URL: {final_url}") + output_parts.append("") + output_parts.append(text) + output = "\n".join(output_parts) + + # Screenshot + screenshot_b64: str | None = None + if take_screenshot: + png = await page.screenshot(full_page=False) + screenshot_b64 = base64.b64encode(png).decode() + + await context.close() + await browser.close() + + metadata: dict = {} + if screenshot_b64: + metadata = {"base64": screenshot_b64, "mime": "image/png", "is_image": True} + + return ToolResult(success=True, output=output, metadata=metadata or None) + + except Exception as e: + return ToolResult(success=False, output=f"Browser error: {e}", error=str(e))