Newer
Older
voice / examples / client_browser.html
<!DOCTYPE html>
<html lang="ru">
<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>Voice TTS</title>
  <style>
    :root { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; }
    body { max-width: 640px; margin: 2rem auto; padding: 0 1rem; line-height: 1.5; }
    textarea { width: 100%; height: 6rem; box-sizing: border-box; font: inherit; padding: 0.5rem; margin-top: 0.25rem; }
    input { font: inherit; padding: 0.5rem; margin-top: 0.25rem; width: 100%; box-sizing: border-box; }
    .row { display: flex; gap: 0.5rem; margin-top: 0.75rem; }
    button { flex: 1; cursor: pointer; padding: 0.5rem; border: none; border-radius: 0.375rem; color: white; font: inherit; }
    #connect { background: #2563eb; }
    #speak { background: #16a34a; }
    #stop { background: #dc2626; }
    button:disabled { opacity: 0.4; cursor: not-allowed; }
    #log { margin-top: 1rem; padding: 0.75rem; min-height: 6rem; background: #f3f4f6; border-radius: 0.375rem; white-space: pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 0.8rem; }
  </style>
</head>
<body>
  <h1>Voice TTS</h1>
  <input id="uri" type="text" value="ws://localhost:8765/ws" placeholder="ws://..." />
  <textarea id="text" placeholder="Текст для озвучки">Привет. Это тестовый запуск из браузера.</textarea>
  <div class="row">
    <button id="connect">Connect</button>
    <button id="speak" disabled>Speak</button>
    <button id="stop" disabled>Stop</button>
  </div>
  <div id="log">Нажми Connect для запуска.</div>

  <script>
    const $ = (id) => document.getElementById(id);
    const log = (msg) => {
      const line = `[${new Date().toLocaleTimeString()}] ${msg}`;
      $('log').textContent += line + '\n';
    };

    let ws = null;
    let audioCtx = null;
    let nextStartTime = 0;
    let seq = 0;
    const nextSeq = () => ++seq;

    const ensureAudioContext = async () => {
      const Ctor = window.AudioContext || window.webkitAudioContext;
      if (!audioCtx || audioCtx.state === 'closed') {
        audioCtx = new Ctor({ sampleRate: 44100 });
      }
      if (audioCtx.state === 'suspended') {
        log('Resuming AudioContext ...');
        await audioCtx.resume();
        let attempts = 0;
        while (audioCtx.state !== 'running' && attempts < 20) {
          await new Promise((r) => setTimeout(r, 25));
          attempts++;
        }
      }
    };

    const base64ToBytes = (base64) => {
      const binary = atob(base64);
      const bytes = new Uint8Array(binary.length);
      for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
      return bytes;
    };

    const playPcm16 = async (base64Data, seq, serverSampleRate) => {
      await ensureAudioContext();
      if (audioCtx.state !== 'running') throw new Error('AudioContext not running');
      const bytes = base64ToBytes(base64Data);
      if (bytes.length === 0) throw new Error('Empty audio');
      const sampleCount = bytes.length / 2;
      const view = new DataView(bytes.buffer);
      const buffer = audioCtx.createBuffer(1, sampleCount, serverSampleRate);
      const channel = buffer.getChannelData(0);
      for (let i = 0; i < sampleCount; i++) channel[i] = view.getInt16(i * 2, true) / 32768.0;
      const source = audioCtx.createBufferSource();
      source.buffer = buffer;
      source.connect(audioCtx.destination);
      const now = audioCtx.currentTime;
      if (nextStartTime < now) nextStartTime = now;
      source.start(nextStartTime);
      nextStartTime += buffer.duration;
    };

    $('connect').onclick = async () => {
      if (ws) return;
      log(`Connecting to ${$('uri').value} ...`);
      try {
        ws = new WebSocket($('uri').value);
        ws.onopen = async () => {
          await ensureAudioContext();
          log('Connected');
          $('connect').disabled = true;
          $('speak').disabled = false;
          $('stop').disabled = false;
          ws.send(JSON.stringify({ type: 'init', seq: nextSeq() }));
          log('Init sent (server defaults)');
        };
        ws.onmessage = (event) => {
          let msg;
          try { msg = JSON.parse(event.data); } catch (err) { return; }
          if (msg.type === 'audio') {
            playPcm16(msg.data, msg.seq, msg.sample_rate).catch((e) => log(`playback error: ${e.message}`));
          } else if (msg.type === 'status') {
            log(`status ${msg.event}`);
          } else if (msg.type === 'error') {
            log(`error: ${msg.message}`);
          }
        };
        ws.onclose = () => {
          log('Disconnected');
          ws = null;
          $('connect').disabled = false;
          $('speak').disabled = true;
          $('stop').disabled = true;
        };
        ws.onerror = (err) => log(`error: ${err.message || err}`);
      } catch (err) { log(`Connection failed: ${err.message}`); }
    };

    $('speak').onclick = async () => {
      if (!ws || ws.readyState !== WebSocket.OPEN) { log('Not connected'); return; }
      await ensureAudioContext();
      nextStartTime = audioCtx.currentTime;
      const text = $('text').value.trim();
      if (!text) { log('Empty text'); return; }
      const words = text.split(/\s+/);
      for (let i = 0; i < words.length; i++) {
        const payload = words[i] + (i < words.length - 1 ? ' ' : '');
        ws.send(JSON.stringify({ type: 'text', payload, seq: nextSeq() }));
        await new Promise((r) => setTimeout(r, 120));
      }
      ws.send(JSON.stringify({ type: 'flush', seq: nextSeq() }));
      log('Sent text + flush');
    };

    $('stop').onclick = () => {
      if (!ws || ws.readyState !== WebSocket.OPEN) return;
      ws.send(JSON.stringify({ type: 'stop', reason: 'user-interrupt', seq: nextSeq() }));
      nextStartTime = 0;
      log('Stop sent');
    };
  </script>
</body>
</html>