Newer
Older
navi-1 / navi / tools / web_search.py
"""Web search tool — DDG multi-backend retry with Brave/SearXNG fallback."""

import asyncio

import httpx
from duckduckgo_search import DDGS

from navi.config import settings

from .base import Tool, ToolResult

# DDG backends tried in order. Each hits a different DDG endpoint, so if one
# is rate-limited or blocked the next one usually succeeds.
_DDG_BACKENDS = ["auto", "html", "lite"]
_DDG_RETRY_DELAY = 1.5  # seconds between backend attempts


class WebSearchTool(Tool):
    name = "web_search"
    description = (
        "Search the web (DuckDuckGo with Brave/SearXNG fallback). "
        "Use when you need current info, real-time data, documentation, or facts you're "
        "uncertain about. Returns titles, URLs, snippets. "
        "Prefer this over your own training knowledge when recency or accuracy matters."
    )
    parameters = {
        "type": "object",
        "properties": {
            "query": {"type": "string", "description": "Search query"},
            "max_results": {
                "type": "integer",
                "description": "Number of results to return (default 5)",
                "default": 5,
            },
        },
        "required": ["query"],
    }

    async def execute(self, params: dict) -> ToolResult:
        query = params["query"]
        max_results = int(params.get("max_results", 5))

        # 1. Try DuckDuckGo with each backend in sequence
        last_ddg_error: str = ""
        for backend in _DDG_BACKENDS:
            try:
                results = await asyncio.to_thread(
                    lambda b=backend: list(
                        DDGS().text(query, backend=b, max_results=max_results)
                    )
                )
                if results:
                    return self._format(results, source=f"DDG/{backend}")
                # Empty list — DDG returned nothing (blocked or no results)
                last_ddg_error = "empty response"
            except Exception as e:
                last_ddg_error = str(e)
            if backend != _DDG_BACKENDS[-1]:
                await asyncio.sleep(_DDG_RETRY_DELAY)

        # 2. Brave Search fallback
        if settings.brave_search_api_key:
            try:
                results = await self._brave(query, max_results)
                if results:
                    return self._format(results, source="Brave")
            except Exception as e:
                last_ddg_error = f"Brave: {e}"

        # 3. SearXNG fallback
        if settings.searxng_url:
            try:
                results = await self._searxng(query, max_results)
                if results:
                    return self._format(results, source="SearXNG")
            except Exception as e:
                last_ddg_error = f"SearXNG: {e}"

        return ToolResult(
            success=False,
            output=(
                f"All search backends failed for query: {query!r}\n"
                f"Last error: {last_ddg_error}\n"
                "Tip: set BRAVE_SEARCH_API_KEY or SEARXNG_URL in .env for fallback backends."
            ),
            error=last_ddg_error,
        )

    # ── helpers ───────────────────────────────────────────────────────────────

    def _format(self, results: list[dict], source: str) -> ToolResult:
        lines = [
            f"[{i+1}] {r.get('title', '')}\n"
            f"    URL: {r.get('href') or r.get('url', '')}\n"
            f"    {r.get('body') or r.get('description', '')}"
            for i, r in enumerate(results)
        ]
        output = f"[{source}]\n\n" + "\n\n".join(lines)
        return ToolResult(
            success=True,
            output=output,
            metadata={"results": results, "source": source},
        )

    async def _brave(self, query: str, max_results: int) -> list[dict]:
        async with httpx.AsyncClient(timeout=15.0) as client:
            resp = await client.get(
                "https://api.search.brave.com/res/v1/web/search",
                params={"q": query, "count": min(max_results, 20)},
                headers={
                    "Accept": "application/json",
                    "Accept-Encoding": "gzip",
                    "X-Subscription-Token": settings.brave_search_api_key,
                },
            )
            resp.raise_for_status()
            items = resp.json().get("web", {}).get("results", [])
            return [
                {"title": r["title"], "href": r["url"], "body": r.get("description", "")}
                for r in items
            ]

    async def _searxng(self, query: str, max_results: int) -> list[dict]:
        async with httpx.AsyncClient(timeout=15.0) as client:
            resp = await client.get(
                f"{settings.searxng_url.rstrip('/')}/search",
                params={
                    "q": query,
                    "format": "json",
                    "engines": "google,bing,duckduckgo",
                },
            )
            resp.raise_for_status()
            results = resp.json().get("results", [])[:max_results]
            return [
                {"title": r["title"], "href": r["url"], "body": r.get("content", "")}
                for r in results
            ]