Newer
Older
voice / tests / test_segmenter.py
"""Tests for WebSocket protocol and segmenter."""

import pytest

from voice_tts.tts.segmenter import Segmenter


def test_segmenter_sentence_split():
    seg = Segmenter(min_length=10, max_length=200)
    buffer = "Привет, мир! Как дела? Это тестовый сегмент для проверки работы сегментатора."
    remaining, segments = seg.feed(buffer)
    # First sentence is long enough and should be emitted immediately.
    assert len(segments) >= 1
    assert segments[0].text == "Привет, мир!"
    # The short second sentence is accumulated with the third one until min_length is met.
    assert remaining == ""
    assert len(segments) == 2
    assert segments[1].text == "Как дела? Это тестовый сегмент для проверки работы сегментатора."


def test_segmenter_max_length_clause_split():
    seg = Segmenter(min_length=10, max_length=50)
    buffer = (
        "Это очень длинное предложение без точки, которое должно быть разрезано "
        "по запятой или другому разделителю, потому что иначе оно слишком длинное"
    )
    remaining, segments = seg.feed(buffer)
    assert segments
    for s in segments:
        assert len(s.text) <= seg.max_length + 5  # small tolerance


def test_segmenter_flush():
    seg = Segmenter(min_length=100, max_length=200)
    remaining, _ = seg.feed("Короткий текст")
    assert remaining == "Короткий текст"
    flushed = seg.flush(remaining)
    assert len(flushed) == 1
    assert flushed[0].text == "Короткий текст"