"""Helpers for text preprocessing and reference voice management."""
import re
from pathlib import Path
# Common sentence-ending punctuation for multiple languages.
SENTENCE_ENDINGS = {
".", "!", "?", ";", ":",
"。", "!", "?", ";", ":",
}
def normalize_whitespace(text: str) -> str:
"""Collapse repeated whitespace and strip edges, preserving single spaces."""
return re.sub(r"\s+", " ", text).strip()
def preprocess_text_for_tts(text: str) -> str:
"""
Minimal cleanup before TTS.
- Collapse whitespace.
- Remove control characters.
"""
text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", text)
return normalize_whitespace(text)
def has_sentence_ending(text: str) -> bool:
"""Check whether the text ends with a sentence-ending punctuation."""
stripped = text.rstrip()
return any(stripped.endswith(p) for p in SENTENCE_ENDINGS)
def validate_reference_audio(path: Path) -> None:
"""Raise a clear error if the reference audio file is missing or unsupported."""
if not path.exists():
raise FileNotFoundError(
f"Reference audio not found: {path}. "
f"Place a WAV/MP3 file under {path.parent}/ and retry."
)
if not path.is_file():
raise ValueError(f"Reference audio path is not a file: {path}")