From 13bb1c354bc33e46d725353f69b3f3d64226d65f Mon Sep 17 00:00:00 2001
From: marauder-actual <marauder@saiden.dev>
Date: Fri, 29 May 2026 19:05:04 +0200
Subject: [PATCH] fix: strip markdown for TTS + render rich markdown in chat UI

- _md_to_speech(): AST-based markdown stripping via markdown-it-py
- Truncate TTS input at 450 chars on sentence boundary (chatterbox overflow)
- chat.js: render assistant messages as markdown via marked.js + DOMPurify
- Typewriter accumulates raw text, renders markdown progressively
---
 app/main.py             | 56 ++++++++++++++++++++++++++++++++++++++++-
 app/static/chat.js      | 23 +++++++++++++++--
 app/templates/chat.html |  2 ++
 3 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/app/main.py b/app/main.py
index 50a0193..853debe 100644
--- a/app/main.py
+++ b/app/main.py
@@ -939,17 +939,71 @@ async def _send_audio(ws: WebSocket, text: str) -> None:
     await _send_audio_with_voice(ws, text, tts.voice)
 
 
+_TTS_MAX_CHARS = 450  # chatterbox s3-tokenizer overflows beyond ~500 chars
+
+
+def _md_to_speech(text: str) -> str:
+    """Strip markdown to plain speech-ready text, capped for TTS safety.
+
+    1. Parse markdown AST — skip code blocks, horizontal rules.
+    2. Extract plain text from inline nodes.
+    3. Truncate at sentence boundary near _TTS_MAX_CHARS to avoid
+       chatterbox token overflow (garbled audio on long inputs).
+    """
+    import re
+    from markdown_it import MarkdownIt
+
+    md = MarkdownIt()
+    tokens = md.parse(text)
+    parts: list[str] = []
+    for token in tokens:
+        if token.type in ("fence", "hr", "code_block"):
+            continue
+        if token.type in ("paragraph_close", "heading_close", "blockquote_close", "list_item_close"):
+            parts.append(" ")
+            continue
+        if token.children:
+            for child in token.children:
+                if child.type == "text":
+                    parts.append(child.content)
+                elif child.type == "code_inline":
+                    parts.append(child.content)
+                elif child.type == "softbreak":
+                    parts.append(" ")
+        elif token.type == "inline" and token.content and not token.children:
+            parts.append(token.content)
+    clean = " ".join("".join(parts).split()).strip()
+
+    # Truncate at sentence boundary if too long
+    if len(clean) <= _TTS_MAX_CHARS:
+        return clean
+    # Find last sentence-ending punctuation before the limit
+    truncated = clean[:_TTS_MAX_CHARS]
+    m = re.search(r"[.!?](?:\s|$)", truncated[::-1])
+    if m:
+        cut = _TTS_MAX_CHARS - m.start()
+        return clean[:cut].strip()
+    # No sentence boundary — hard cut at last space
+    last_space = truncated.rfind(" ")
+    if last_space > 200:
+        return truncated[:last_space].strip()
+    return truncated.strip()
+
+
 async def _send_audio_with_voice(ws: WebSocket, text: str, voice_id: str) -> None:
     """Synthesize text in a specific voice and ship as audio. Used post-calibration."""
     if not TTS_ENABLED:
         return
     import base64
     try:
+        speech_text = _md_to_speech(text) if text else ""
+        if not speech_text:
+            return
         # spin up a per-voice synthesizer (cheap — just object init)
         per_voice = TTS(voice=voice_id) if voice_id != (tts.voice if tts else "") else tts
         if not per_voice or not per_voice.available:
             return
-        wav = await per_voice.synthesize(text)
+        wav = await per_voice.synthesize(speech_text)
         if not wav:
             return
         await ws.send_json({
diff --git a/app/static/chat.js b/app/static/chat.js
index 7b25244..f6ac683 100644
--- a/app/static/chat.js
+++ b/app/static/chat.js
@@ -21,9 +21,22 @@ let ws = null;
 let connectAttempts = 0;
 let lastSpeaker = null;       // 'user' | 'bt' | 'system' | null
 let currentBtBody = null;     // active streaming .msg__body element
+let currentBtRaw = '';        // raw markdown accumulator for streaming
 let queue = [];
 let draining = false;
 
+// Configure marked for safe markdown rendering
+if (typeof marked !== 'undefined') {
+  marked.setOptions({ breaks: true, gfm: true });
+}
+
+function renderMarkdown(raw) {
+  if (typeof marked === 'undefined') return raw;
+  const html = marked.parse(raw);
+  if (typeof DOMPurify !== 'undefined') return DOMPurify.sanitize(html);
+  return html;
+}
+
 // ---------- helpers ----------
 
 function speakerLabel(role) {
@@ -97,7 +110,9 @@ async function drain() {
   if (!currentBtBody) return;
   draining = true;
   while (queue.length) {
-    currentBtBody.textContent += queue.shift();
+    currentBtRaw += queue.shift();
+    // Re-render markdown on each char (marked.parse is fast enough)
+    currentBtBody.innerHTML = renderMarkdown(currentBtRaw);
     if (Math.random() < 0.04) scrollToBottom();
     await sleep(TYPEWRITER_MS);
   }
@@ -107,16 +122,19 @@ async function drain() {
 function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
 
 function finishBt() {
-  // wait for the queue to drain before adding caret
+  // wait for the queue to drain before final render
   const tick = () => {
     if (draining || queue.length) { setTimeout(tick, 30); return; }
     if (!currentBtBody) return;
+    // Final markdown render with complete text
+    currentBtBody.innerHTML = renderMarkdown(currentBtRaw);
     const caret = document.createElement('span');
     caret.className = 'caret';
     currentBtBody.appendChild(caret);
     scrollToBottom();
     setTimeout(() => caret.remove(), 900);
     currentBtBody = null;
+    currentBtRaw = '';
   };
   tick();
 }
@@ -188,6 +206,7 @@ function handleMessage(msg) {
     if (!currentBtBody) {
       removeThinking();
       currentBtBody = makeMsg('bt');
+      currentBtRaw = '';
     }
     if (msg.delta) enqueue(msg.delta);
     if (msg.done) finishBt();
diff --git a/app/templates/chat.html b/app/templates/chat.html
index 8dbb0f8..1ac49b7 100644
--- a/app/templates/chat.html
+++ b/app/templates/chat.html
@@ -14,6 +14,8 @@
   <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:ital,wght@0,400;0,500;0,600;1,400&family=Caveat:wght@400;500&family=Inter:wght@300;400;500;600&family=Source+Serif+Pro:ital,wght@0,400;0,600;1,400&family=JetBrains+Mono:wght@400;500&display=swap">
 
   <link rel="stylesheet" href="/static/chat.css">
+  <script src="https://cdn.jsdelivr.net/npm/marked@15/marked.min.js"></script>
+  <script src="https://cdn.jsdelivr.net/npm/dompurify@3/dist/purify.min.js"></script>
 </head>
 <body
     data-palette="{{ ui_palette }}"