Newer
Older
voice / .env.example
# Local TTS pipeline configuration example
# Copy this file to .env and adjust values.

HOST=0.0.0.0
PORT=8765
LOG_LEVEL=INFO

TTS_BACKEND=f5_tts
# F5-TTS model name. Built-in options: F5TTS_v1_Base, F5TTS_v1_Small.
# Downloaded automatically on first use via HuggingFace.
TTS_MODEL_NAME=F5TTS_v1_Base
# TTS_MODEL_PATH=models/f5-tts/model.pt
# TTS_VOCAB_PATH=models/f5-tts/vocab.txt
TTS_SAMPLE_RATE=24000

VOICES_DIR=voices
# Path to default reference audio (relative to project root or absolute).
# Providing DEFAULT_VOICE_REF enables instant warm-up and voice cloning.
DEFAULT_VOICE_REF=voices/rick_ref_clean.wav
# Exact transcript of the reference audio. When set, Whisper transcription is skipped.
DEFAULT_REF_TEXT="Ва-ба-ла-ба-дап-дап! Рикки-тики-тави, сученька! И вот такие у нас новости! Иди."

MIN_SEGMENT_LENGTH=30
MAX_SEGMENT_LENGTH=200
MAX_BUFFER_WAIT_MS=500

DEVICE=cuda
DTYPE=bfloat16

# Run a dummy inference at startup to cache reference and prime CUDA.
WARMUP=true