Newer
Older
navi-1 / navi / tools / web_search.py
"""Web search tool — DDG html backend (primary) with Brave/SearXNG fallback."""

import asyncio

import httpx
from ddgs import DDGS

from navi.config import settings

from .base import Tool, ToolResult

# html is the only DDG backend that reliably returns results; auto/lite are flaky.
# On failure we fall back to html once more, then Brave/SearXNG.
_DDG_RETRY_DELAY = 2.0  # seconds before second DDG attempt


class WebSearchTool(Tool):
    name = "web_search"
    description = (
        "Search the web (DuckDuckGo with Brave/SearXNG fallback). "
        "Use when you need current info, real-time data, documentation, or facts you're "
        "uncertain about. Returns titles, URLs, snippets. "
        "Prefer this over your own training knowledge when recency or accuracy matters."
    )
    parameters = {
        "type": "object",
        "properties": {
            "query": {"type": "string", "description": "Search query"},
            "max_results": {
                "type": "integer",
                "description": "Number of results to return (default 5)",
                "default": 5,
            },
        },
        "required": ["query"],
    }

    async def execute(self, params: dict) -> ToolResult:
        query = params["query"]
        max_results = int(params.get("max_results", 5))
        last_error: str = ""

        # 1. SearXNG (primary — self-hosted, aggregates multiple engines)
        if settings.searxng_url:
            try:
                results = await self._searxng(query, max_results)
                if results:
                    return self._format(results, source="SearXNG")
                last_error = "SearXNG: empty response"
            except Exception as e:
                last_error = f"SearXNG: {e}"

        # 2. DuckDuckGo html fallback, retry once
        for attempt in range(2):
            try:
                results = await asyncio.to_thread(
                    lambda: list(
                        DDGS().text(query, backend="html", max_results=max_results, region="ua-uk")
                    )
                )
                if results:
                    return self._format(results, source="DDG")
                last_error = "DDG: empty response"
            except Exception as e:
                last_error = f"DDG: {e}"
            if attempt == 0:
                await asyncio.sleep(_DDG_RETRY_DELAY)

        # 3. Brave Search fallback
        if settings.brave_search_api_key:
            try:
                results = await self._brave(query, max_results)
                if results:
                    return self._format(results, source="Brave")
            except Exception as e:
                last_error = f"Brave: {e}"

        return ToolResult(
            success=False,
            output=(
                f"All search backends failed for query: {query!r}\n"
                f"Last error: {last_error}"
            ),
            error=last_error,
        )

    # ── helpers ───────────────────────────────────────────────────────────────

    def _format(self, results: list[dict], source: str) -> ToolResult:
        lines = [
            f"[{i+1}] {r.get('title', '')}\n"
            f"    URL: {r.get('href') or r.get('url', '')}\n"
            f"    {r.get('body') or r.get('description', '')}"
            for i, r in enumerate(results)
        ]
        output = f"[{source}]\n\n" + "\n\n".join(lines)
        return ToolResult(
            success=True,
            output=output,
            metadata={"results": results, "source": source},
        )

    async def _brave(self, query: str, max_results: int) -> list[dict]:
        async with httpx.AsyncClient(timeout=15.0) as client:
            resp = await client.get(
                "https://api.search.brave.com/res/v1/web/search",
                params={"q": query, "count": min(max_results, 20), "country": "ALL"},
                headers={
                    "Accept": "application/json",
                    "Accept-Encoding": "gzip",
                    "X-Subscription-Token": settings.brave_search_api_key,
                },
            )
            resp.raise_for_status()
            items = resp.json().get("web", {}).get("results", [])
            return [
                {"title": r["title"], "href": r["url"], "body": r.get("description", "")}
                for r in items
            ]

    async def _searxng(self, query: str, max_results: int) -> list[dict]:
        async with httpx.AsyncClient(timeout=15.0) as client:
            resp = await client.get(
                f"{settings.searxng_url.rstrip('/')}/search",
                params={
                    "q": query,
                    "format": "json",
                    "engines": "google,bing,duckduckgo",
                    "language": "uk-UA",
                },
            )
            resp.raise_for_status()
            results = resp.json().get("results", [])[:max_results]
            return [
                {"title": r["title"], "href": r["url"], "body": r.get("content", "")}
                for r in results
            ]