"""web_search — SearXNG primary, DDG fallback, Brave tertiary."""
from __future__ import annotations
import asyncio
from typing import Any
import httpx
from ddgs import DDGS
from .config import Settings
_DDG_RETRY_DELAY = 2.0
_TIMEOUT = 15.0
async def web_search(
settings: Settings,
query: str,
max_results: int = 5,
) -> dict[str, Any]:
"""Search the web. Returns dict with success, output, error, metadata."""
last_error = ""
# 1. SearXNG (primary)
if settings.searxng_url:
try:
results = await _searxng(settings, query, max_results)
if results:
return _format(results, source="SearXNG")
last_error = "SearXNG: empty response"
except Exception as e:
last_error = f"SearXNG: {e}"
# 2. DuckDuckGo html fallback, retry once
for attempt in range(2):
try:
results = await asyncio.to_thread(
lambda: list(
DDGS().text(query, backend="html", max_results=max_results, region=settings.ddg_region)
)
)
if results:
return _format(results, source="DDG")
last_error = "DDG: empty response"
except Exception as e:
last_error = f"DDG: {e}"
if attempt == 0:
await asyncio.sleep(_DDG_RETRY_DELAY)
# 3. Brave Search fallback
if settings.brave_api_key:
try:
results = await _brave(settings, query, max_results)
if results:
return _format(results, source="Brave")
except Exception as e:
last_error = f"Brave: {e}"
return {
"success": False,
"output": f"All search backends failed for query: {query!r}\nLast error: {last_error}",
"error": last_error,
"metadata": {},
}
async def _searxng(settings: Settings, query: str, max_results: int) -> list[dict]:
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
resp = await client.get(
f"{settings.searxng_url}/search",
params={
"q": query,
"format": "json",
"engines": "google,bing,duckduckgo",
"language": "uk-UA",
},
)
resp.raise_for_status()
results = resp.json().get("results", [])[:max_results]
return [
{"title": r["title"], "href": r["url"], "body": r.get("content", "")}
for r in results
]
async def _brave(settings: Settings, query: str, max_results: int) -> list[dict]:
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
resp = await client.get(
"https://api.search.brave.com/res/v1/web/search",
params={"q": query, "count": min(max_results, 20), "country": "ALL"},
headers={
"Accept": "application/json",
"Accept-Encoding": "gzip",
"X-Subscription-Token": settings.brave_api_key,
},
)
resp.raise_for_status()
items = resp.json().get("web", {}).get("results", [])
return [
{"title": r["title"], "href": r["url"], "body": r.get("description", "")}
for r in items
]
def _format(results: list[dict], source: str) -> dict[str, Any]:
lines = [
f"[{i+1}] {r.get('title', '')}\n"
f" URL: {r.get('href') or r.get('url', '')}\n"
f" {r.get('body') or r.get('description', '')}"
for i, r in enumerate(results)
]
return {
"success": True,
"output": f"[{source}]\n\n" + "\n\n".join(lines),
"error": None,
"metadata": {"results": results, "source": source},
}