fix: strip markdown for TTS + render rich markdown in chat UI
- _md_to_speech(): AST-based markdown stripping via markdown-it-py - Truncate TTS input at 450 chars on sentence boundary (chatterbox overflow) - chat.js: render assistant messages as markdown via marked.js + DOMPurify - Typewriter accumulates raw text, renders markdown progressively
This commit is contained in:
+55
-1
@@ -939,17 +939,71 @@ async def _send_audio(ws: WebSocket, text: str) -> None:
|
|||||||
await _send_audio_with_voice(ws, text, tts.voice)
|
await _send_audio_with_voice(ws, text, tts.voice)
|
||||||
|
|
||||||
|
|
||||||
|
_TTS_MAX_CHARS = 450 # chatterbox s3-tokenizer overflows beyond ~500 chars
|
||||||
|
|
||||||
|
|
||||||
|
def _md_to_speech(text: str) -> str:
|
||||||
|
"""Strip markdown to plain speech-ready text, capped for TTS safety.
|
||||||
|
|
||||||
|
1. Parse markdown AST — skip code blocks, horizontal rules.
|
||||||
|
2. Extract plain text from inline nodes.
|
||||||
|
3. Truncate at sentence boundary near _TTS_MAX_CHARS to avoid
|
||||||
|
chatterbox token overflow (garbled audio on long inputs).
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from markdown_it import MarkdownIt
|
||||||
|
|
||||||
|
md = MarkdownIt()
|
||||||
|
tokens = md.parse(text)
|
||||||
|
parts: list[str] = []
|
||||||
|
for token in tokens:
|
||||||
|
if token.type in ("fence", "hr", "code_block"):
|
||||||
|
continue
|
||||||
|
if token.type in ("paragraph_close", "heading_close", "blockquote_close", "list_item_close"):
|
||||||
|
parts.append(" ")
|
||||||
|
continue
|
||||||
|
if token.children:
|
||||||
|
for child in token.children:
|
||||||
|
if child.type == "text":
|
||||||
|
parts.append(child.content)
|
||||||
|
elif child.type == "code_inline":
|
||||||
|
parts.append(child.content)
|
||||||
|
elif child.type == "softbreak":
|
||||||
|
parts.append(" ")
|
||||||
|
elif token.type == "inline" and token.content and not token.children:
|
||||||
|
parts.append(token.content)
|
||||||
|
clean = " ".join("".join(parts).split()).strip()
|
||||||
|
|
||||||
|
# Truncate at sentence boundary if too long
|
||||||
|
if len(clean) <= _TTS_MAX_CHARS:
|
||||||
|
return clean
|
||||||
|
# Find last sentence-ending punctuation before the limit
|
||||||
|
truncated = clean[:_TTS_MAX_CHARS]
|
||||||
|
m = re.search(r"[.!?](?:\s|$)", truncated[::-1])
|
||||||
|
if m:
|
||||||
|
cut = _TTS_MAX_CHARS - m.start()
|
||||||
|
return clean[:cut].strip()
|
||||||
|
# No sentence boundary — hard cut at last space
|
||||||
|
last_space = truncated.rfind(" ")
|
||||||
|
if last_space > 200:
|
||||||
|
return truncated[:last_space].strip()
|
||||||
|
return truncated.strip()
|
||||||
|
|
||||||
|
|
||||||
async def _send_audio_with_voice(ws: WebSocket, text: str, voice_id: str) -> None:
|
async def _send_audio_with_voice(ws: WebSocket, text: str, voice_id: str) -> None:
|
||||||
"""Synthesize text in a specific voice and ship as audio. Used post-calibration."""
|
"""Synthesize text in a specific voice and ship as audio. Used post-calibration."""
|
||||||
if not TTS_ENABLED:
|
if not TTS_ENABLED:
|
||||||
return
|
return
|
||||||
import base64
|
import base64
|
||||||
try:
|
try:
|
||||||
|
speech_text = _md_to_speech(text) if text else ""
|
||||||
|
if not speech_text:
|
||||||
|
return
|
||||||
# spin up a per-voice synthesizer (cheap — just object init)
|
# spin up a per-voice synthesizer (cheap — just object init)
|
||||||
per_voice = TTS(voice=voice_id) if voice_id != (tts.voice if tts else "") else tts
|
per_voice = TTS(voice=voice_id) if voice_id != (tts.voice if tts else "") else tts
|
||||||
if not per_voice or not per_voice.available:
|
if not per_voice or not per_voice.available:
|
||||||
return
|
return
|
||||||
wav = await per_voice.synthesize(text)
|
wav = await per_voice.synthesize(speech_text)
|
||||||
if not wav:
|
if not wav:
|
||||||
return
|
return
|
||||||
await ws.send_json({
|
await ws.send_json({
|
||||||
|
|||||||
+21
-2
@@ -21,9 +21,22 @@ let ws = null;
|
|||||||
let connectAttempts = 0;
|
let connectAttempts = 0;
|
||||||
let lastSpeaker = null; // 'user' | 'bt' | 'system' | null
|
let lastSpeaker = null; // 'user' | 'bt' | 'system' | null
|
||||||
let currentBtBody = null; // active streaming .msg__body element
|
let currentBtBody = null; // active streaming .msg__body element
|
||||||
|
let currentBtRaw = ''; // raw markdown accumulator for streaming
|
||||||
let queue = [];
|
let queue = [];
|
||||||
let draining = false;
|
let draining = false;
|
||||||
|
|
||||||
|
// Configure marked for safe markdown rendering
|
||||||
|
if (typeof marked !== 'undefined') {
|
||||||
|
marked.setOptions({ breaks: true, gfm: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderMarkdown(raw) {
|
||||||
|
if (typeof marked === 'undefined') return raw;
|
||||||
|
const html = marked.parse(raw);
|
||||||
|
if (typeof DOMPurify !== 'undefined') return DOMPurify.sanitize(html);
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------- helpers ----------
|
// ---------- helpers ----------
|
||||||
|
|
||||||
function speakerLabel(role) {
|
function speakerLabel(role) {
|
||||||
@@ -97,7 +110,9 @@ async function drain() {
|
|||||||
if (!currentBtBody) return;
|
if (!currentBtBody) return;
|
||||||
draining = true;
|
draining = true;
|
||||||
while (queue.length) {
|
while (queue.length) {
|
||||||
currentBtBody.textContent += queue.shift();
|
currentBtRaw += queue.shift();
|
||||||
|
// Re-render markdown on each char (marked.parse is fast enough)
|
||||||
|
currentBtBody.innerHTML = renderMarkdown(currentBtRaw);
|
||||||
if (Math.random() < 0.04) scrollToBottom();
|
if (Math.random() < 0.04) scrollToBottom();
|
||||||
await sleep(TYPEWRITER_MS);
|
await sleep(TYPEWRITER_MS);
|
||||||
}
|
}
|
||||||
@@ -107,16 +122,19 @@ async function drain() {
|
|||||||
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
||||||
|
|
||||||
function finishBt() {
|
function finishBt() {
|
||||||
// wait for the queue to drain before adding caret
|
// wait for the queue to drain before final render
|
||||||
const tick = () => {
|
const tick = () => {
|
||||||
if (draining || queue.length) { setTimeout(tick, 30); return; }
|
if (draining || queue.length) { setTimeout(tick, 30); return; }
|
||||||
if (!currentBtBody) return;
|
if (!currentBtBody) return;
|
||||||
|
// Final markdown render with complete text
|
||||||
|
currentBtBody.innerHTML = renderMarkdown(currentBtRaw);
|
||||||
const caret = document.createElement('span');
|
const caret = document.createElement('span');
|
||||||
caret.className = 'caret';
|
caret.className = 'caret';
|
||||||
currentBtBody.appendChild(caret);
|
currentBtBody.appendChild(caret);
|
||||||
scrollToBottom();
|
scrollToBottom();
|
||||||
setTimeout(() => caret.remove(), 900);
|
setTimeout(() => caret.remove(), 900);
|
||||||
currentBtBody = null;
|
currentBtBody = null;
|
||||||
|
currentBtRaw = '';
|
||||||
};
|
};
|
||||||
tick();
|
tick();
|
||||||
}
|
}
|
||||||
@@ -188,6 +206,7 @@ function handleMessage(msg) {
|
|||||||
if (!currentBtBody) {
|
if (!currentBtBody) {
|
||||||
removeThinking();
|
removeThinking();
|
||||||
currentBtBody = makeMsg('bt');
|
currentBtBody = makeMsg('bt');
|
||||||
|
currentBtRaw = '';
|
||||||
}
|
}
|
||||||
if (msg.delta) enqueue(msg.delta);
|
if (msg.delta) enqueue(msg.delta);
|
||||||
if (msg.done) finishBt();
|
if (msg.done) finishBt();
|
||||||
|
|||||||
@@ -14,6 +14,8 @@
|
|||||||
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:ital,wght@0,400;0,500;0,600;1,400&family=Caveat:wght@400;500&family=Inter:wght@300;400;500;600&family=Source+Serif+Pro:ital,wght@0,400;0,600;1,400&family=JetBrains+Mono:wght@400;500&display=swap">
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:ital,wght@0,400;0,500;0,600;1,400&family=Caveat:wght@400;500&family=Inter:wght@300;400;500;600&family=Source+Serif+Pro:ital,wght@0,400;0,600;1,400&family=JetBrains+Mono:wght@400;500&display=swap">
|
||||||
|
|
||||||
<link rel="stylesheet" href="/static/chat.css">
|
<link rel="stylesheet" href="/static/chat.css">
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/marked@15/marked.min.js"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/dompurify@3/dist/purify.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body
|
<body
|
||||||
data-palette="{{ ui_palette }}"
|
data-palette="{{ ui_palette }}"
|
||||||
|
|||||||
Reference in New Issue
Block a user