<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Voice TTS WebSocket Client</title>
<style>
:root { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; }
body { max-width: 720px; margin: 2rem auto; padding: 0 1rem; line-height: 1.5; }
label { display: block; margin-top: 1rem; font-weight: 600; }
input, textarea, select, button { font: inherit; padding: 0.5rem; margin-top: 0.25rem; }
input[type="text"], input[type="number"], select { width: 100%; box-sizing: border-box; }
textarea { width: 100%; height: 6rem; box-sizing: border-box; }
.row { display: flex; gap: 1rem; align-items: end; }
.row > * { flex: 1; }
button { cursor: pointer; background: #2563eb; color: white; border: none; border-radius: 0.375rem; }
button:disabled { opacity: 0.5; cursor: not-allowed; }
.stop { background: #dc2626; }
#log { margin-top: 1rem; padding: 0.75rem; min-height: 8rem; background: #f3f4f6; border-radius: 0.375rem; white-space: pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 0.875rem; }
.status { color: #4b5563; }
</style>
</head>
<body>
<h1>Voice TTS WebSocket Client</h1>
<label>Server URI</label>
<input id="uri" type="text" value="ws://localhost:8765/ws" />
<div class="row">
<div>
<label>Language</label>
<select id="language">
<option value="ru">Russian</option>
<option value="en">English</option>
<option value="uk">Ukrainian</option>
<option value="es">Spanish</option>
<option value="de">German</option>
<option value="fr">French</option>
</select>
</div>
<div>
<label>Speed</label>
<input id="speed" type="number" min="0.5" max="2" step="0.1" value="1.0" />
</div>
<div>
<label>Emotion</label>
<input id="emotion" type="text" value="neutral" />
</div>
</div>
<label>Voice reference path (optional)</label>
<input id="voiceRef" type="text" placeholder="voices/rick_ref_clean.wav" />
<label>Text to speak</label>
<textarea id="text">Привет. Это тестовый запуск из браузера.</textarea>
<div class="row" style="margin-top: 1rem;">
<button id="connect">Connect</button>
<button id="speak" disabled>Speak streaming</button>
<button id="stop" class="stop" disabled>Stop</button>
</div>
<div id="log"></div>
<script>
const $ = (id) => document.getElementById(id);
const log = (msg) => {
const line = `[${new Date().toLocaleTimeString()}] ${msg}`;
$('log').textContent += line + '\n';
};
let ws = null;
let audioCtx = null;
let nextStartTime = 0;
let seq = 0;
const nextSeq = () => ++seq;
const ensureAudioContext = () => {
if (!audioCtx) {
audioCtx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 });
}
if (audioCtx.state === 'suspended') {
audioCtx.resume();
}
};
const playPcm16 = (base64Data) => {
ensureAudioContext();
const raw = atob(base64Data);
const samples = new Int16Array(raw.length / 2);
const view = new DataView(samples.buffer);
for (let i = 0; i < raw.length; i += 2) {
// little-endian PCM16
samples[i / 2] = view.getInt16(i, true);
}
// Convert to float32 AudioBuffer
const buffer = audioCtx.createBuffer(1, samples.length, 24000);
const channel = buffer.getChannelData(0);
for (let i = 0; i < samples.length; i++) {
channel[i] = samples[i] / 32768.0;
}
const source = audioCtx.createBufferSource();
source.buffer = buffer;
source.connect(audioCtx.destination);
const now = audioCtx.currentTime;
if (nextStartTime < now) {
nextStartTime = now;
}
source.start(nextStartTime);
nextStartTime += buffer.duration;
};
$('connect').onclick = async () => {
if (ws) return;
const uri = $('uri').value;
log(`Connecting to ${uri} ...`);
try {
ws = new WebSocket(uri);
ws.onopen = () => {
log('Connected');
$('connect').disabled = true;
$('speak').disabled = false;
$('stop').disabled = false;
const init = {
type: 'init',
seq: nextSeq(),
session_id: 'browser-client',
language: $('language').value,
speed: parseFloat($('speed').value),
emotion: $('emotion').value,
};
if ($('voiceRef').value.trim()) {
init.voice_ref = $('voiceRef').value.trim();
}
ws.send(JSON.stringify(init));
log('Sent init');
};
ws.onmessage = (event) => {
const msg = JSON.parse(event.data);
if (msg.type === 'audio') {
playPcm16(msg.data);
log(`audio seq=${msg.seq} len=${(msg.data.length * 3 / 4 / 2 / 24000).toFixed(2)}s`);
} else if (msg.type === 'status') {
log(`status ${msg.event} seq=${msg.seq}`);
} else if (msg.type === 'error') {
log(`error: ${msg.message}`);
}
};
ws.onclose = () => {
log('Disconnected');
ws = null;
$('connect').disabled = false;
$('speak').disabled = true;
$('stop').disabled = true;
};
ws.onerror = (err) => log(`WebSocket error: ${err.message || err}`);
} catch (err) {
log(`Connection failed: ${err.message}`);
}
};
$('speak').onclick = async () => {
if (!ws || ws.readyState !== WebSocket.OPEN) {
log('Not connected');
return;
}
ensureAudioContext();
nextStartTime = 0;
const text = $('text').value.trim();
const words = text.split(/\s+/);
log(`Streaming ${words.length} words ...`);
for (let i = 0; i < words.length; i++) {
const payload = words[i] + (i < words.length - 1 ? ' ' : '');
ws.send(JSON.stringify({ type: 'text', payload, seq: nextSeq() }));
await new Promise((r) => setTimeout(r, 120));
}
ws.send(JSON.stringify({ type: 'flush', seq: nextSeq() }));
log('Sent flush');
};
$('stop').onclick = () => {
if (!ws || ws.readyState !== WebSocket.OPEN) return;
ws.send(JSON.stringify({ type: 'stop', reason: 'user-interrupt', seq: nextSeq() }));
nextStartTime = 0;
log('Sent stop');
};
</script>
</body>
</html>