"""TTS engine abstraction and dummy backend."""
import asyncio
from abc import ABC, abstractmethod
from pathlib import Path
import numpy as np
class TTSEngine(ABC):
"""Base interface for a TTS backend."""
sample_rate: int = 24_000
@abstractmethod
async def synthesize(
self,
text: str,
ref_audio_path: Path | None,
language: str,
speed: float,
emotion: str,
) -> np.ndarray:
"""Return audio as float32 ndarray normalized to [-1, 1]."""
...
@abstractmethod
async def warm_up(self) -> None:
"""Optional warm-up inference."""
...
class DummyTTSEngine(TTSEngine):
"""Generates a silent/sine beep segment for testing without a GPU model."""
def __init__(self, sample_rate: int = 24_000):
self.sample_rate = sample_rate
async def synthesize(
self,
text: str,
ref_audio_path: Path | None,
language: str,
speed: float,
emotion: str,
) -> np.ndarray:
duration_sec = max(0.5, len(text) * 0.08) / speed
num_samples = int(self.sample_rate * duration_sec)
t = np.linspace(0, duration_sec, num_samples, endpoint=False)
# 440 Hz tone with slight fade to avoid clicks
audio = 0.3 * np.sin(2 * np.pi * 440 * t)
audio *= np.hanning(num_samples)
return audio.astype(np.float32)
async def warm_up(self) -> None:
pass