"""Helpers for text preprocessing and reference voice management."""

import re
from pathlib import Path


# Common sentence-ending punctuation for multiple languages.
SENTENCE_ENDINGS = {
    ".", "!", "?", ";", ":",
    "。", "！", "？", "；", "：",
}


def normalize_whitespace(text: str) -> str:
    """Collapse repeated whitespace and strip edges, preserving single spaces."""
    return re.sub(r"\s+", " ", text).strip()


def preprocess_text_for_tts(text: str) -> str:
    """
    Minimal cleanup before TTS.
    - Collapse whitespace.
    - Remove control characters.
    """
    text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", text)
    return normalize_whitespace(text)


def has_sentence_ending(text: str) -> bool:
    """Check whether the text ends with a sentence-ending punctuation."""
    stripped = text.rstrip()
    return any(stripped.endswith(p) for p in SENTENCE_ENDINGS)


def validate_reference_audio(path: Path) -> None:
    """Raise a clear error if the reference audio file is missing or unsupported."""
    if not path.exists():
        raise FileNotFoundError(
            f"Reference audio not found: {path}. "
            f"Place a WAV/MP3 file under {path.parent}/ and retry."
        )
    if not path.is_file():
        raise ValueError(f"Reference audio path is not a file: {path}")
