Newer
Older
navi-1 / mcp-servers / navi-web / app / search.py
"""web_search — SearXNG primary, DDG fallback, Brave tertiary."""

from __future__ import annotations

import asyncio
from typing import Any

import httpx
from ddgs import DDGS

from .config import Settings

_DDG_RETRY_DELAY = 2.0
_TIMEOUT = 15.0


async def web_search(
    settings: Settings,
    query: str,
    max_results: int = 5,
) -> dict[str, Any]:
    """Search the web. Returns dict with success, output, error, metadata."""
    last_error = ""

    # 1. SearXNG (primary)
    if settings.searxng_url:
        try:
            results = await _searxng(settings, query, max_results)
            if results:
                return _format(results, source="SearXNG")
            last_error = "SearXNG: empty response"
        except Exception as e:
            last_error = f"SearXNG: {e}"

    # 2. DuckDuckGo html fallback, retry once
    for attempt in range(2):
        try:
            results = await asyncio.to_thread(
                lambda: list(
                    DDGS().text(query, backend="html", max_results=max_results, region=settings.ddg_region)
                )
            )
            if results:
                return _format(results, source="DDG")
            last_error = "DDG: empty response"
        except Exception as e:
            last_error = f"DDG: {e}"
        if attempt == 0:
            await asyncio.sleep(_DDG_RETRY_DELAY)

    # 3. Brave Search fallback
    if settings.brave_api_key:
        try:
            results = await _brave(settings, query, max_results)
            if results:
                return _format(results, source="Brave")
        except Exception as e:
            last_error = f"Brave: {e}"

    return {
        "success": False,
        "output": f"All search backends failed for query: {query!r}\nLast error: {last_error}",
        "error": last_error,
        "metadata": {},
    }


async def _searxng(settings: Settings, query: str, max_results: int) -> list[dict]:
    async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
        resp = await client.get(
            f"{settings.searxng_url}/search",
            params={
                "q": query,
                "format": "json",
                "engines": "google,bing,duckduckgo",
                "language": "uk-UA",
            },
        )
        resp.raise_for_status()
        results = resp.json().get("results", [])[:max_results]
        return [
            {"title": r["title"], "href": r["url"], "body": r.get("content", "")}
            for r in results
        ]


async def _brave(settings: Settings, query: str, max_results: int) -> list[dict]:
    async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
        resp = await client.get(
            "https://api.search.brave.com/res/v1/web/search",
            params={"q": query, "count": min(max_results, 20), "country": "ALL"},
            headers={
                "Accept": "application/json",
                "Accept-Encoding": "gzip",
                "X-Subscription-Token": settings.brave_api_key,
            },
        )
        resp.raise_for_status()
        items = resp.json().get("web", {}).get("results", [])
        return [
            {"title": r["title"], "href": r["url"], "body": r.get("description", "")}
            for r in items
        ]


def _format(results: list[dict], source: str) -> dict[str, Any]:
    lines = [
        f"[{i+1}] {r.get('title', '')} — {r.get('href') or r.get('url', '')}"
        for i, r in enumerate(results)
    ]
    return {
        "success": True,
        "output": f"[{source}]\n" + "\n".join(lines),
        "error": None,
        "metadata": {"results": results, "source": source},
    }