from pathlib import Path
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application configuration loaded from environment variables."""
host: str = "0.0.0.0"
port: int = 8765
log_level: str = "INFO"
# TTS model configuration
tts_backend: str = "fish_speech" # "dummy" / "f5_tts" / "xtts_v2" / "fish_speech"
# XTTS-v2 model name (Coqui model manager path); used when backend is xtts_v2.
tts_model_name: str = "tts_models/multilingual/multi-dataset/xtts_v2"
# Local checkpoint path. For Fish Speech this is the folder containing model.pth,
# firefly-gan-vq-fsq-8x1024-21hz-generator.pth, tokenizer.tiktoken, config.json, etc.
tts_model_path: Path | None = None
# Source tree path for Fish Speech modules (e.g. models/fish-speech-v1.5.1).
tts_vocab_path: Path | None = None
tts_sample_rate: int = 44_100
tts_speed: float = 1.2 # env: TTS_SPEED
# Reference voices directory
voices_dir: Path = Path("voices")
# Segmentation thresholds
min_segment_length: int = 30
max_segment_length: int = 200
max_buffer_wait_ms: int = 500
fast_start_initial: int = 12 # first segment threshold for lower latency
fast_start_count: int = 3 # how many segments use progressive sizing
# GPU / inference
device: str = "cuda" # or "cpu"
dtype: str = "bfloat16"
# Voice reference
default_voice_ref: Path | None = None # env: DEFAULT_VOICE_REF
default_ref_text: str | None = None # env: DEFAULT_REF_TEXT
# S2-Pro backend settings
s2_api_url: str = "http://127.0.0.1:8081"
# Fish Speech-specific settings
fish_compile: bool = False # torch.compile the LLaMA model (slow first run)
fish_chunk_length: int = 200 # 100-300; higher = longer coherent chunks
fish_use_memory_cache: str = "on" # "on" / "off" reference VQ cache
fish_top_p: float = 0.7 # nucleus sampling (0-1); lower = more deterministic
fish_temperature: float = 0.7 # sampling temperature; lower = more stable
fish_repetition_penalty: float = 1.2 # >1 reduces repeated tokens
fish_seed: int | None = None # None = random; set for reproducible output
fish_tail_silence_threshold: float = 0.02 # trim trailing silence below this RMS
fish_lowpass_cutoff: int = 0 # Hz; low-pass filter output to reduce VQ noise (0 = off)
# Warm-up
warmup: bool = False # run a dummy inference at startup
warmup_text: str = "Привет. Это тестовая фраза."
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
settings = Settings()