feat: chatterbox TTS via madcat-tts daemon, Web Speech API STT, styled persona picker

- tts.py: replace piper subprocess with HTTP POST to madcat-tts /v1/audio/speech (chatterbox voice cloning)
- chat.js: replace whisper server upload with browser Web Speech API (webkitSpeechRecognition)
- chat.css: style persona picker — appearance:none select, themed with CSS vars, mobile responsive
- main.py: default TTS voice → bt7274-en
This commit is contained in:
marauder-actual
2026-05-29 16:43:41 +02:00
parent f3c35eba72
commit ae384fe618
4 changed files with 165 additions and 131 deletions
+65 -55
View File
@@ -241,77 +241,87 @@ document.addEventListener('keydown', (e) => {
}
});
// ---------- voice input (whisper) ----------
// ---------- voice input (Web Speech API) ----------
const $mic = document.getElementById('mic-button');
let mediaRecorder = null;
let recordedChunks = [];
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
let recognition = null;
let recording = false;
async function startRecording() {
// Disable mic if browser lacks support
if (!SpeechRecognition) {
console.warn('Web Speech API not supported — mic disabled');
$mic.disabled = true;
$mic.title = 'Speech recognition not supported in this browser';
}
function startRecording() {
if (!SpeechRecognition) return;
if (recognition) {
// stale instance — tear it down first
try { recognition.abort(); } catch {}
recognition = null;
}
recognition = new SpeechRecognition();
recognition.continuous = false;
recognition.interimResults = true;
recognition.lang = 'en-US';
recognition.addEventListener('result', (e) => {
// Build transcript from all results (interim + final)
let transcript = '';
for (let i = 0; i < e.results.length; i++) {
transcript += e.results[i][0].transcript;
}
$input.value = transcript;
});
recognition.addEventListener('end', () => {
recording = false;
$mic.classList.remove('recording');
recognition = null;
// Auto-send if we got text
const text = $input.value.trim();
if (text) {
$form.dispatchEvent(new Event('submit', { cancelable: true }));
} else {
flashMicEmpty();
}
});
recognition.addEventListener('error', (e) => {
recording = false;
$mic.classList.remove('recording');
recognition = null;
// 'no-speech' and 'aborted' are expected — not worth alarming the user
if (e.error === 'no-speech') {
flashMicEmpty();
} else if (e.error !== 'aborted') {
console.warn('speech recognition error:', e.error);
flashMicEmpty();
}
});
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
recordedChunks = [];
// Prefer webm/opus if browser supports it (Chrome/FF). Safari may need fallback.
const mimeTypes = ['audio/webm;codecs=opus', 'audio/webm', 'audio/mp4', ''];
const mime = mimeTypes.find(m => !m || MediaRecorder.isTypeSupported(m)) || '';
mediaRecorder = new MediaRecorder(stream, mime ? { mimeType: mime } : undefined);
mediaRecorder.addEventListener('dataavailable', (e) => {
if (e.data && e.data.size > 0) recordedChunks.push(e.data);
});
mediaRecorder.addEventListener('stop', () => {
// release the mic immediately
stream.getTracks().forEach(t => t.stop());
handleRecorded();
});
mediaRecorder.start();
recognition.start();
recording = true;
$mic.classList.add('recording');
} catch (err) {
console.warn('microphone unavailable:', err.message);
console.warn('speech recognition failed to start:', err.message);
recording = false;
}
}
function stopRecording() {
if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
mediaRecorder.stop();
recording = false;
$mic.classList.remove('recording');
$mic.classList.add('transcribing');
}
async function handleRecorded() {
if (!recordedChunks.length) {
$mic.classList.remove('transcribing');
return;
}
const blob = new Blob(recordedChunks, { type: recordedChunks[0].type || 'audio/webm' });
const ext = (blob.type.split('/')[1] || 'webm').split(';')[0];
const form = new FormData();
form.append('audio', blob, `speech.${ext}`);
try {
const resp = await fetch('/api/transcribe', { method: 'POST', body: form });
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
const { text } = await resp.json();
if (text && text.trim()) {
$input.value = text;
// auto-send — Her vibe is "speak and she hears"
$form.dispatchEvent(new Event('submit', { cancelable: true }));
} else {
flashMicEmpty();
}
} catch (err) {
console.warn('transcription failed:', err.message);
flashMicEmpty();
} finally {
$mic.classList.remove('transcribing');
}
if (!recognition) return;
try { recognition.stop(); } catch {}
// 'end' event handler cleans up recording state and sends
}
function flashMicEmpty() {
// brief visual hint that nothing was heard — no toast, no popup, just a flash
$mic.classList.add('empty');
setTimeout(() => $mic.classList.remove('empty'), 700);
}