Newer
Older
voice / examples / client_browser.html
<!DOCTYPE html>
<html lang="ru">
<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>Voice TTS WebSocket Client</title>
  <style>
    :root { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; }
    body { max-width: 720px; margin: 2rem auto; padding: 0 1rem; line-height: 1.5; }
    label { display: block; margin-top: 1rem; font-weight: 600; }
    input, textarea, select, button { font: inherit; padding: 0.5rem; margin-top: 0.25rem; }
    input[type="text"], input[type="number"], select { width: 100%; box-sizing: border-box; }
    textarea { width: 100%; height: 6rem; box-sizing: border-box; }
    .row { display: flex; gap: 1rem; align-items: end; }
    .row > * { flex: 1; }
    button { cursor: pointer; background: #2563eb; color: white; border: none; border-radius: 0.375rem; }
    button:disabled { opacity: 0.5; cursor: not-allowed; }
    .stop { background: #dc2626; }
    #log { margin-top: 1rem; padding: 0.75rem; min-height: 8rem; background: #f3f4f6; border-radius: 0.375rem; white-space: pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 0.875rem; }
    .status { color: #4b5563; }
  </style>
</head>
<body>
  <h1>Voice TTS WebSocket Client</h1>

  <label>Server URI</label>
  <input id="uri" type="text" value="ws://localhost:8765/ws" />

  <div class="row">
    <div>
      <label>Language</label>
      <select id="language">
        <option value="ru">Russian</option>
        <option value="en">English</option>
        <option value="uk">Ukrainian</option>
        <option value="es">Spanish</option>
        <option value="de">German</option>
        <option value="fr">French</option>
      </select>
    </div>
    <div>
      <label>Speed</label>
      <input id="speed" type="number" min="0.5" max="2" step="0.1" value="1.0" />
    </div>
    <div>
      <label>Emotion</label>
      <input id="emotion" type="text" value="neutral" />
    </div>
  </div>

  <label>Voice reference path (optional)</label>
  <input id="voiceRef" type="text" placeholder="voices/rick_ref_clean.wav" />

  <label>Text to speak</label>
  <textarea id="text">Привет. Это тестовый запуск из браузера.</textarea>

  <div class="row" style="margin-top: 1rem;">
    <button id="connect">Connect</button>
    <button id="speak" disabled>Speak streaming</button>
    <button id="stop" class="stop" disabled>Stop</button>
  </div>

  <div id="log"></div>

  <script>
    const $ = (id) => document.getElementById(id);
    const log = (msg) => {
      const line = `[${new Date().toLocaleTimeString()}] ${msg}`;
      $('log').textContent += line + '\n';
    };

    let ws = null;
    let audioCtx = null;
    let nextStartTime = 0;
    let seq = 0;

    const nextSeq = () => ++seq;

    const ensureAudioContext = () => {
      if (!audioCtx) {
        audioCtx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 });
      }
      if (audioCtx.state === 'suspended') {
        audioCtx.resume();
      }
    };

    const playPcm16 = (base64Data) => {
      ensureAudioContext();
      const raw = atob(base64Data);
      const samples = new Int16Array(raw.length / 2);
      const view = new DataView(samples.buffer);
      for (let i = 0; i < raw.length; i += 2) {
        // little-endian PCM16
        samples[i / 2] = view.getInt16(i, true);
      }

      // Convert to float32 AudioBuffer
      const buffer = audioCtx.createBuffer(1, samples.length, 24000);
      const channel = buffer.getChannelData(0);
      for (let i = 0; i < samples.length; i++) {
        channel[i] = samples[i] / 32768.0;
      }

      const source = audioCtx.createBufferSource();
      source.buffer = buffer;
      source.connect(audioCtx.destination);

      const now = audioCtx.currentTime;
      if (nextStartTime < now) {
        nextStartTime = now;
      }
      source.start(nextStartTime);
      nextStartTime += buffer.duration;
    };

    $('connect').onclick = async () => {
      if (ws) return;
      const uri = $('uri').value;
      log(`Connecting to ${uri} ...`);
      try {
        ws = new WebSocket(uri);
        ws.onopen = () => {
          log('Connected');
          $('connect').disabled = true;
          $('speak').disabled = false;
          $('stop').disabled = false;

          const init = {
            type: 'init',
            seq: nextSeq(),
            session_id: 'browser-client',
            language: $('language').value,
            speed: parseFloat($('speed').value),
            emotion: $('emotion').value,
          };
          if ($('voiceRef').value.trim()) {
            init.voice_ref = $('voiceRef').value.trim();
          }
          ws.send(JSON.stringify(init));
          log('Sent init');
        };

        ws.onmessage = (event) => {
          const msg = JSON.parse(event.data);
          if (msg.type === 'audio') {
            playPcm16(msg.data);
            log(`audio seq=${msg.seq} len=${(msg.data.length * 3 / 4 / 2 / 24000).toFixed(2)}s`);
          } else if (msg.type === 'status') {
            log(`status ${msg.event} seq=${msg.seq}`);
          } else if (msg.type === 'error') {
            log(`error: ${msg.message}`);
          }
        };

        ws.onclose = () => {
          log('Disconnected');
          ws = null;
          $('connect').disabled = false;
          $('speak').disabled = true;
          $('stop').disabled = true;
        };

        ws.onerror = (err) => log(`WebSocket error: ${err.message || err}`);
      } catch (err) {
        log(`Connection failed: ${err.message}`);
      }
    };

    $('speak').onclick = async () => {
      if (!ws || ws.readyState !== WebSocket.OPEN) {
        log('Not connected');
        return;
      }
      ensureAudioContext();
      nextStartTime = 0;

      const text = $('text').value.trim();
      const words = text.split(/\s+/);
      log(`Streaming ${words.length} words ...`);

      for (let i = 0; i < words.length; i++) {
        const payload = words[i] + (i < words.length - 1 ? ' ' : '');
        ws.send(JSON.stringify({ type: 'text', payload, seq: nextSeq() }));
        await new Promise((r) => setTimeout(r, 120));
      }

      ws.send(JSON.stringify({ type: 'flush', seq: nextSeq() }));
      log('Sent flush');
    };

    $('stop').onclick = () => {
      if (!ws || ws.readyState !== WebSocket.OPEN) return;
      ws.send(JSON.stringify({ type: 'stop', reason: 'user-interrupt', seq: nextSeq() }));
      nextStartTime = 0;
      log('Sent stop');
    };
  </script>
</body>
</html>