diff --git a/mcp-servers/navi-web/app/__init__.py b/mcp-servers/navi-web/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/mcp-servers/navi-web/app/__init__.py diff --git a/mcp-servers/navi-web/app/browse.py b/mcp-servers/navi-web/app/browse.py new file mode 100644 index 0000000..9d37833 --- /dev/null +++ b/mcp-servers/navi-web/app/browse.py @@ -0,0 +1,117 @@ +"""web_view — headless browser page extraction.""" + +from __future__ import annotations + +import base64 +import re +from typing import Any + +from playwright.async_api import async_playwright + +_TIMEOUT = 30_000 +_MAX_TEXT = 20_000 + + +def _clean(text: str) -> str: + lines = [line.rstrip() for line in text.splitlines()] + result: list[str] = [] + blank_run = 0 + for line in lines: + if line == "": + blank_run += 1 + if blank_run <= 2: + result.append("") + else: + blank_run = 0 + result.append(line) + return "\n".join(result).strip() + + +async def web_view( + url: str, + screenshot: bool = False, + wait_until: str = "networkidle", +) -> dict[str, Any]: + """Open a URL in a headless browser and return readable text. + + Returns dict with success, output, error, metadata. + """ + if not url.startswith(("http://", "https://")): + return { + "success": False, + "output": "URL must start with http:// or https://", + "error": "invalid_url", + "metadata": {}, + } + + try: + async with async_playwright() as pw: + browser = await pw.chromium.launch(headless=True) + context = await browser.new_context( + viewport={"width": 1280, "height": 800}, + user_agent=( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + ), + ) + page = await context.new_page() + + try: + await page.goto(url, wait_until=wait_until, timeout=_TIMEOUT) + except Exception: + pass + + title = await page.title() + final_url = page.url + + text = await page.evaluate("""() => { + const kill = ['script','style','noscript','iframe', + 'nav','header','footer','aside', + '[role="navigation"]','[role="banner"]', + '[role="contentinfo"]']; + const clone = document.body.cloneNode(true); + kill.forEach(sel => { + clone.querySelectorAll(sel).forEach(el => el.remove()); + }); + return clone.innerText || clone.textContent || ''; + }""") + + text = _clean(text) + if len(text) > _MAX_TEXT: + text = text[:_MAX_TEXT] + f"\n\n[… truncated at {_MAX_TEXT} chars]" + + output_parts = [] + if title: + output_parts.append(f"Title: {title}") + if final_url != url: + output_parts.append(f"Final URL: {final_url}") + output_parts.append("") + output_parts.append(text) + output = "\n".join(output_parts) + + metadata: dict[str, Any] = {} + if screenshot: + png = await page.screenshot(full_page=False) + metadata = { + "base64": base64.b64encode(png).decode(), + "mime": "image/png", + "is_image": True, + } + + await context.close() + await browser.close() + + return { + "success": True, + "output": output, + "error": None, + "metadata": metadata or {}, + } + + except Exception as e: + return { + "success": False, + "output": f"Browser error: {e}", + "error": str(e), + "metadata": {}, + } diff --git a/mcp-servers/navi-web/app/config.py b/mcp-servers/navi-web/app/config.py new file mode 100644 index 0000000..db975de --- /dev/null +++ b/mcp-servers/navi-web/app/config.py @@ -0,0 +1,14 @@ +"""Configuration for navi-web MCP server.""" + +from __future__ import annotations + +import os + + +class Settings: + """Env-based settings.""" + + def __init__(self) -> None: + self.searxng_url = os.environ.get("NAVI_WEB_SEARXNG_URL", "").rstrip("/") + self.brave_api_key = os.environ.get("NAVI_WEB_BRAVE_API_KEY", "") + self.ddg_region = os.environ.get("NAVI_WEB_DDG_REGION", "ua-uk") diff --git a/mcp-servers/navi-web/app/mcp_server.py b/mcp-servers/navi-web/app/mcp_server.py new file mode 100644 index 0000000..8ec0a28 --- /dev/null +++ b/mcp-servers/navi-web/app/mcp_server.py @@ -0,0 +1,92 @@ +"""MCP server for Navi web tools (search, browse, HTTP).""" + +from __future__ import annotations + +import json +import os +from typing import Annotated, Any + +from mcp.server.fastmcp import FastMCP +from pydantic import Field + +from .browse import web_view +from .config import Settings +from .request import http_request +from .search import web_search + +INSTRUCTIONS = """ +Navi Web MCP server provides web search, browsing, and raw HTTP tools. + +Use it when the task involves: +- searching the web for current info, docs, or real-time data; +- opening a URL in a browser to read human-readable content; +- making REST API calls, webhooks, or raw HTTP requests. + +Workflow: +1. web_search — find relevant pages or facts. +2. web_view — open promising URLs to read full content. +3. http_request — call APIs or services requiring headers/auth. + +All three tools are stateless and work with public URLs. +No session_id or filesystem paths are required. +""".strip() + +mcp = FastMCP("navi-web", instructions=INSTRUCTIONS) + + +def _json(data: Any) -> str: + return json.dumps(data, ensure_ascii=False, indent=2) + + +def _settings() -> Settings: + return Settings() + + +@mcp.tool(name="web_search") +async def web_search_tool( + query: Annotated[str, Field(description="Search query.")], + max_results: Annotated[int, Field(description="Number of results (default 5).")] = 5, +) -> str: + """Search the web (SearXNG primary, DDG fallback, Brave tertiary).""" + result = await web_search(_settings(), query, max_results) + return _json(result) + + +@mcp.tool(name="web_view") +async def web_view_tool( + url: Annotated[str, Field(description="Full URL to open (must start with http:// or https://).")], + screenshot: Annotated[bool, Field(description="Also capture a screenshot.")] = False, + wait_until: Annotated[ + str, + Field( + description="When to consider the page loaded: load, domcontentloaded, or networkidle (default)." + ), + ] = "networkidle", +) -> str: + """Open a URL in a headless browser and return clean readable text.""" + result = await web_view(url, screenshot=screenshot, wait_until=wait_until) + return _json(result) + + +@mcp.tool(name="http_request") +async def http_request_tool( + method: Annotated[str, Field(description="HTTP method: GET, POST, PUT, PATCH, DELETE.")], + url: Annotated[str, Field(description="Full URL to request.")], + headers: Annotated[dict[str, str] | None, Field(description="Optional HTTP headers.")] = None, + body: Annotated[dict[str, Any] | None, Field(description="Optional JSON body.")] = None, + params: Annotated[dict[str, str] | None, Field(description="Optional query parameters.")] = None, +) -> str: + """Make a raw HTTP request.""" + result = await http_request(method, url, headers=headers, body=body, params=params) + return _json(result) + + +def main() -> None: + transport = os.environ.get("NAVI_WEB_MCP_TRANSPORT", "stdio") + if transport not in {"stdio", "sse", "streamable-http"}: + raise SystemExit("NAVI_WEB_MCP_TRANSPORT must be stdio, sse, or streamable-http") + mcp.run(transport=transport) # type: ignore[arg-type] + + +if __name__ == "__main__": + main() diff --git a/mcp-servers/navi-web/app/request.py b/mcp-servers/navi-web/app/request.py new file mode 100644 index 0000000..aa4a4c7 --- /dev/null +++ b/mcp-servers/navi-web/app/request.py @@ -0,0 +1,58 @@ +"""http_request — raw HTTP via httpx.""" + +from __future__ import annotations + +import json +from typing import Any + +import httpx + +_TIMEOUT = 30.0 + + +async def http_request( + method: str, + url: str, + headers: dict[str, str] | None = None, + body: dict[str, Any] | None = None, + params: dict[str, str] | None = None, +) -> dict[str, Any]: + """Make an HTTP request. Returns dict with success, output, error, metadata.""" + headers = headers or {} + + try: + async with httpx.AsyncClient(timeout=_TIMEOUT, follow_redirects=True) as client: + response = await client.request( + method=method.upper(), + url=url, + headers=headers, + json=body, + params=params, + ) + + try: + body_repr = json.dumps(response.json(), ensure_ascii=False, indent=2) + except Exception: + body_repr = response.text[:4096] + + output = f"Status: {response.status_code}\n\n{body_repr}" + return { + "success": response.is_success, + "output": output, + "error": None if response.is_success else f"HTTP {response.status_code}", + "metadata": {"status_code": response.status_code, "headers": dict(response.headers)}, + } + except httpx.TimeoutException: + return { + "success": False, + "output": f"Request timed out after {_TIMEOUT}s", + "error": "timeout", + "metadata": {}, + } + except Exception as e: + return { + "success": False, + "output": f"Request failed: {e}", + "error": str(e), + "metadata": {}, + } diff --git a/mcp-servers/navi-web/app/search.py b/mcp-servers/navi-web/app/search.py new file mode 100644 index 0000000..cef394d --- /dev/null +++ b/mcp-servers/navi-web/app/search.py @@ -0,0 +1,118 @@ +"""web_search — SearXNG primary, DDG fallback, Brave tertiary.""" + +from __future__ import annotations + +import asyncio +from typing import Any + +import httpx +from ddgs import DDGS + +from .config import Settings + +_DDG_RETRY_DELAY = 2.0 +_TIMEOUT = 15.0 + + +async def web_search( + settings: Settings, + query: str, + max_results: int = 5, +) -> dict[str, Any]: + """Search the web. Returns dict with success, output, error, metadata.""" + last_error = "" + + # 1. SearXNG (primary) + if settings.searxng_url: + try: + results = await _searxng(settings, query, max_results) + if results: + return _format(results, source="SearXNG") + last_error = "SearXNG: empty response" + except Exception as e: + last_error = f"SearXNG: {e}" + + # 2. DuckDuckGo html fallback, retry once + for attempt in range(2): + try: + results = await asyncio.to_thread( + lambda: list( + DDGS().text(query, backend="html", max_results=max_results, region=settings.ddg_region) + ) + ) + if results: + return _format(results, source="DDG") + last_error = "DDG: empty response" + except Exception as e: + last_error = f"DDG: {e}" + if attempt == 0: + await asyncio.sleep(_DDG_RETRY_DELAY) + + # 3. Brave Search fallback + if settings.brave_api_key: + try: + results = await _brave(settings, query, max_results) + if results: + return _format(results, source="Brave") + except Exception as e: + last_error = f"Brave: {e}" + + return { + "success": False, + "output": f"All search backends failed for query: {query!r}\nLast error: {last_error}", + "error": last_error, + "metadata": {}, + } + + +async def _searxng(settings: Settings, query: str, max_results: int) -> list[dict]: + async with httpx.AsyncClient(timeout=_TIMEOUT) as client: + resp = await client.get( + f"{settings.searxng_url}/search", + params={ + "q": query, + "format": "json", + "engines": "google,bing,duckduckgo", + "language": "uk-UA", + }, + ) + resp.raise_for_status() + results = resp.json().get("results", [])[:max_results] + return [ + {"title": r["title"], "href": r["url"], "body": r.get("content", "")} + for r in results + ] + + +async def _brave(settings: Settings, query: str, max_results: int) -> list[dict]: + async with httpx.AsyncClient(timeout=_TIMEOUT) as client: + resp = await client.get( + "https://api.search.brave.com/res/v1/web/search", + params={"q": query, "count": min(max_results, 20), "country": "ALL"}, + headers={ + "Accept": "application/json", + "Accept-Encoding": "gzip", + "X-Subscription-Token": settings.brave_api_key, + }, + ) + resp.raise_for_status() + items = resp.json().get("web", {}).get("results", []) + return [ + {"title": r["title"], "href": r["url"], "body": r.get("description", "")} + for r in items + ] + + +def _format(results: list[dict], source: str) -> dict[str, Any]: + lines = [ + f"[{i+1}] {r.get('title', '')}\n" + f" URL: {r.get('href') or r.get('url', '')}\n" + f" {r.get('body') or r.get('description', '')}" + for i, r in enumerate(results) + ] + return { + "success": True, + "output": f"[{source}]\n\n" + "\n\n".join(lines), + "error": None, + "metadata": {"results": results, "source": source}, + } diff --git a/mcp-servers/navi-web/pyproject.toml b/mcp-servers/navi-web/pyproject.toml new file mode 100644 index 0000000..4beab11 --- /dev/null +++ b/mcp-servers/navi-web/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "mcp-server-navi-web" +version = "0.1.0" +description = "MCP server for Navi web tools (search, browse, HTTP)" +requires-python = ">=3.11" +dependencies = [ + "mcp>=1.27", + "httpx>=0.27", + "ddgs>=1.0", + "playwright>=1.40", + "pydantic>=2.0", +] + +[project.scripts] +mcp-server-navi-web = "app.mcp_server:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["app*"] diff --git a/mcp_servers.json b/mcp_servers.json index 68d9313..f13d36e 100644 --- a/mcp_servers.json +++ b/mcp_servers.json @@ -47,5 +47,26 @@ ] }, "instructions": "Navi 3D MCP server provides OpenSCAD-based 3D modeling tools.\n\nUse it when the task involves generating 3D models, rendering previews, or linting OpenSCAD source.\n\nWorkflow:\n1. Write the .scad script to the current session directory via filesystem.\n2. Call lint_scad first to catch common mistakes.\n3. Call compile_scad to produce the STL.\n4. Call render_stl to generate PNG previews.\n5. Use content_publish or share_file to show results to the user.\n\nAll paths are session-scoped. Pass the exact Navi session_id.\n\nABSOLUTE RULE — NEVER bypass MCP tools:\nYou MUST NOT use filesystem, terminal, code_exec, or any direct file access to read or write 3D model files that belong to the navi-3d knowledge base. Use only the MCP tools listed above." + }, + "navi-web": { + "transport": "stdio", + "command": "/home/gmikcon/Projects/navi-1/mcp-servers/navi-web/.venv/bin/python", + "args": ["-m", "app.mcp_server"], + "cwd": "/home/gmikcon/Projects/navi-1/mcp-servers/navi-web", + "env": { + "NAVI_WEB_MCP_TRANSPORT": "stdio" + }, + "groups": { + "search": [ + "web_search" + ], + "browse": [ + "web_view" + ], + "request": [ + "http_request" + ] + }, + "instructions": "Navi Web MCP server provides web search, browsing, and raw HTTP tools.\n\nUse it when the task involves:\n- searching the web for current info, docs, or real-time data;\n- opening a URL in a browser to read human-readable content;\n- making REST API calls, webhooks, or raw HTTP requests.\n\nWorkflow:\n1. web_search — find relevant pages or facts.\n2. web_view — open promising URLs to read full content.\n3. http_request — call APIs or services requiring headers/auth.\n\nAll three tools are stateless and work with public URLs.\nNo session_id or filesystem paths are required.\n\nABSOLUTE RULE — NEVER bypass MCP tools:\nYou MUST NOT use filesystem, terminal, code_exec, or any direct file access to read or write web content. Use only the MCP tools listed above." } } \ No newline at end of file diff --git a/navi/core/registry.py b/navi/core/registry.py index 14a0903..fe31b6f 100644 --- a/navi/core/registry.py +++ b/navi/core/registry.py @@ -10,7 +10,6 @@ CodeExecTool, DeleteToolTool, FilesystemTool, - HttpRequestTool, ImageViewTool, ListProfilesTool, MemoryTool, @@ -23,8 +22,6 @@ TestToolTool, TodoTool, Tool, - WebSearchTool, - WebViewTool, ) from navi.tools.list_tools import ListToolsTool from navi.tools.reload_tools import ReloadToolsTool @@ -193,7 +190,7 @@ manual_tool = ToolManualTool(registry=tools) memory_tool = MemoryTool(memory_store) if memory_store else None mcp_status_tool = McpStatusTool() - builtins = [WebSearchTool(), FilesystemTool(ai_helper=ai_helper), HttpRequestTool(), WebViewTool(), + builtins = [FilesystemTool(ai_helper=ai_helper), CodeExecTool(), TerminalTool(), SshExecTool(), ImageViewTool(), ShareFileTool(), ContentPublishTool(), TestToolTool(), TodoTool(), ScratchpadTool(), ReflectTool(ai_helper=ai_helper), diff --git a/navi/profiles/developer/config.json b/navi/profiles/developer/config.json index 6ee35f0..ca02165 100644 --- a/navi/profiles/developer/config.json +++ b/navi/profiles/developer/config.json @@ -31,9 +31,6 @@ "todo", "scratchpad", "reflect", - "web_search", - "web_view", - "http_request", "filesystem", "code_exec", "terminal", @@ -48,9 +45,6 @@ "reflect", "switch_profile", "list_profiles", - "web_search", - "web_view", - "http_request", "filesystem", "code_exec", "terminal", @@ -68,5 +62,12 @@ "planning_phase3_enabled": true, "top_k": 40, "top_p": 0.88, - "num_thread": 11 -} + "num_thread": 11, + "mcp_servers": { + "navi-web": [ + "search", + "browse", + "request" + ] + } +} \ No newline at end of file diff --git a/navi/profiles/discuss/config.json b/navi/profiles/discuss/config.json index ac80641..4ea327f 100644 --- a/navi/profiles/discuss/config.json +++ b/navi/profiles/discuss/config.json @@ -12,8 +12,6 @@ "temperature": 0.65, "max_iterations": 30, "enabled_tools": [ - "web_search", - "web_view", "scratchpad", "reflect", "memory", @@ -27,7 +25,14 @@ "filesystem" ], "mcp_servers": { - "gnexus-book": ["read"] + "gnexus-book": [ + "read" + ], + "navi-web": [ + "search", + "browse", + "request" + ] }, "subagent_tools": [], "planning_enabled": false, @@ -47,4 +52,4 @@ "top_k": 80, "top_p": 0.95, "num_thread": 11 -} +} \ No newline at end of file diff --git a/navi/profiles/modeler_3d/config.json b/navi/profiles/modeler_3d/config.json index b31dc6e..9e266a9 100644 --- a/navi/profiles/modeler_3d/config.json +++ b/navi/profiles/modeler_3d/config.json @@ -32,8 +32,6 @@ "subagent_planning_enabled": false, "subagent_think_enabled": false, "subagent_tools": [ - "web_search", - "web_view", "filesystem", "image_view" ], @@ -43,8 +41,6 @@ "reflect", "switch_profile", "list_profiles", - "web_search", - "web_view", "filesystem", "code_exec", "terminal", @@ -57,9 +53,17 @@ "content_publish" ], "mcp_servers": { - "navi-3d": ["modeling", "analysis"] + "navi-3d": [ + "modeling", + "analysis" + ], + "navi-web": [ + "search", + "browse", + "request" + ] }, "top_k": 30, "top_p": 0.85, "num_thread": 11 -} +} \ No newline at end of file diff --git a/navi/profiles/secretary/config.json b/navi/profiles/secretary/config.json index 6b6f891..a1261d5 100644 --- a/navi/profiles/secretary/config.json +++ b/navi/profiles/secretary/config.json @@ -30,9 +30,6 @@ "subagent_tools": [ "scratchpad", "reflect", - "web_search", - "web_view", - "http_request", "filesystem", "code_exec", "image_view", @@ -46,8 +43,6 @@ "reflect", "switch_profile", "list_profiles", - "web_search", - "web_view", "filesystem", "code_exec", "image_view", @@ -66,5 +61,12 @@ "planning_phase3_enabled": true, "top_k": 50, "top_p": 0.9, - "num_thread": 11 -} + "num_thread": 11, + "mcp_servers": { + "navi-web": [ + "search", + "browse", + "request" + ] + } +} \ No newline at end of file diff --git a/navi/profiles/server_admin/config.json b/navi/profiles/server_admin/config.json index 48bd6b8..5bc3a06 100644 --- a/navi/profiles/server_admin/config.json +++ b/navi/profiles/server_admin/config.json @@ -30,8 +30,6 @@ "subagent_tools": [ "scratchpad", "reflect", - "web_search", - "http_request", "filesystem", "code_exec", "terminal", @@ -46,9 +44,6 @@ "reflect", "switch_profile", "list_profiles", - "web_search", - "web_view", - "http_request", "filesystem", "code_exec", "terminal", @@ -63,7 +58,15 @@ "gmail" ], "mcp_servers": { - "gnexus-book": ["read", "write"] + "gnexus-book": [ + "read", + "write" + ], + "navi-web": [ + "search", + "browse", + "request" + ] }, "planning_mandatory": false, "planning_phase1_enabled": true, @@ -72,4 +75,4 @@ "top_k": 30, "top_p": 0.8, "num_thread": 11 -} +} \ No newline at end of file diff --git a/navi/profiles/tool_developer/config.json b/navi/profiles/tool_developer/config.json index c142a1e..183b34a 100644 --- a/navi/profiles/tool_developer/config.json +++ b/navi/profiles/tool_developer/config.json @@ -31,9 +31,6 @@ "todo", "scratchpad", "reflect", - "web_search", - "web_view", - "http_request", "filesystem", "code_exec", "terminal", @@ -53,9 +50,6 @@ "reflect", "switch_profile", "list_profiles", - "web_search", - "web_view", - "http_request", "filesystem", "code_exec", "terminal", @@ -77,5 +71,12 @@ "planning_phase3_enabled": true, "top_k": 40, "top_p": 0.85, - "num_thread": 11 -} + "num_thread": 11, + "mcp_servers": { + "navi-web": [ + "search", + "browse", + "request" + ] + } +} \ No newline at end of file diff --git a/navi/tools/__init__.py b/navi/tools/__init__.py index 3296243..8ace7c1 100644 --- a/navi/tools/__init__.py +++ b/navi/tools/__init__.py @@ -2,7 +2,6 @@ from .code_exec import CodeExecTool from .delete_tool import DeleteToolTool from .filesystem import FilesystemTool -from .http_request import HttpRequestTool from .image_view import ImageViewTool from .ssh_exec import SshExecTool from .spawn_agent import SpawnAgentTool @@ -14,21 +13,16 @@ from .switch_profile import SwitchProfileTool from .list_profiles import ListProfilesTool from .reflect import ReflectTool -from .web_search import WebSearchTool -from .web_view import WebViewTool __all__ = [ "Tool", "ToolResult", "DeleteToolTool", - "WebSearchTool", "FilesystemTool", - "HttpRequestTool", "CodeExecTool", "TerminalTool", "SshExecTool", "ImageViewTool", - "WebViewTool", "MemoryTool", "TestToolTool", "SpawnAgentTool",