feat: chatterbox TTS via madcat-tts daemon, Web Speech API STT, styled persona picker

- tts.py: replace piper subprocess with HTTP POST to madcat-tts /v1/audio/speech (chatterbox voice cloning) - chat.js: replace whisper server upload with browser Web Speech API (webkitSpeechRecognition) - chat.css: style persona picker — appearance:none select, themed with CSS vars, mobile responsive - main.py: default TTS voice → bt7274-en
2026-05-29 16:43:41 +02:00
parent f3c35eba72
commit ae384fe618
4 changed files with 165 additions and 131 deletions
@@ -1,101 +1,55 @@
-"""Piper TTS adapter for chat.saiden.dev.
+"""HTTP TTS adapter for chat.saiden.dev — madcat-tts daemon (chatterbox).

-Synthesises text → WAV bytes by subprocess'ing the `piper` CLI binary
-(already installed on every host that runs marauder-os).
+Calls the madcat-tts daemon's OpenAI-compatible /v1/audio/speech endpoint
+to synthesize text → WAV bytes via chatterbox voice cloning.

-Designed to fail silently — if piper is missing or synthesis errors,
+Designed to fail silently — if the daemon is down or synthesis errors,
 the chat still works, just without voice.
 """

 from __future__ import annotations

-import asyncio
 import logging
 import os
-import shutil
-import tempfile
-from pathlib import Path
+
+import httpx

 log = logging.getLogger("chat-saiden.tts")

-# Where the voice .onnx files live across hosts.
-# Order: env override → macOS marauder → linux marauder → linux marauder-agent (mesh node) → linux ~/.local
-_VOICE_SEARCH_PATHS = [
-    Path.home() / "Library/Application Support/marauder/voices",
-    Path("/home") / os.environ.get("USER", "marauder") / ".local/share/marauder/voices",
-    Path.home() / ".local/share/marauder/voices",
-    Path.home() / ".local/share/psn/voices",
-    Path.home() / ".local/share/piper/voices",
-]
-
-
-def _resolve_voice_path(name: str) -> Path | None:
-    """Return absolute path to a voice model by short name, or None."""
-    # explicit override
-    override = os.environ.get("TTS_VOICE_PATH")
-    if override:
-        p = Path(override)
-        return p if p.exists() else None
-
-    for base in _VOICE_SEARCH_PATHS:
-        candidate = base / f"{name}.onnx"
-        if candidate.exists():
-            return candidate
-    return None
-
-
-PIPER_BIN = shutil.which("piper") or os.environ.get("PIPER_BIN")
+MADCAT_TTS_URL = os.environ.get("MADCAT_TTS_URL", "http://localhost:14099")


 class TTS:
-    """Subprocess-based piper synthesizer with graceful fallback."""
+    """HTTP-based madcat-tts synthesizer with graceful fallback."""

-    def __init__(self, voice: str = "en_US-amy-medium") -> None:
+    def __init__(self, voice: str = "bt7274-en") -> None:
        self.voice = voice
-        self.voice_path = _resolve_voice_path(voice)
-        self.bin = PIPER_BIN
-        if not self.bin:
-            log.warning("piper binary not found on PATH — TTS disabled")
-        elif not self.voice_path:
-            log.warning("voice '%s' not found in known locations — TTS disabled", voice)
-        else:
-            log.info("TTS enabled — voice=%s path=%s bin=%s", voice, self.voice_path, self.bin)
+        self._url = f"{MADCAT_TTS_URL.rstrip('/')}/v1/audio/speech"
+        log.info("TTS enabled — voice=%s url=%s", voice, self._url)

    @property
    def available(self) -> bool:
-        return bool(self.bin and self.voice_path)
+        return True

    async def synthesize(self, text: str) -> bytes | None:
        """Return WAV bytes, or None on failure / unavailable."""
-        if not self.available:
+        if not text or not text.strip():
            return None
-        if not text.strip():
-            return None
-
-        # piper wants an output file path (no stdout streaming for WAV in older versions)
-        out = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        out.close()
-        out_path = out.name
        try:
-            proc = await asyncio.create_subprocess_exec(
-                self.bin,
-                "--model", str(self.voice_path),
-                "--output_file", out_path,
-                stdin=asyncio.subprocess.PIPE,
-                stdout=asyncio.subprocess.DEVNULL,
-                stderr=asyncio.subprocess.PIPE,
-            )
-            _, stderr = await proc.communicate(text.encode("utf-8"))
-            if proc.returncode != 0:
-                log.error("piper exited %s: %s", proc.returncode, stderr.decode("utf-8", "replace")[:300])
-                return None
-            with open(out_path, "rb") as f:
-                return f.read()
-        except Exception:
-            log.exception("piper synthesis failed")
+            async with httpx.AsyncClient(timeout=15.0) as client:
+                resp = await client.post(
+                    self._url,
+                    json={
+                        "input": text,
+                        "voice": self.voice,
+                        "response_format": "wav",
+                    },
+                )
+                resp.raise_for_status()
+                return resp.content
+        except httpx.TimeoutException:
+            log.warning("TTS timeout for voice=%s (text=%s…)", self.voice, text[:60])
+            return None
+        except Exception:
+            log.exception("TTS synthesis failed for voice=%s", self.voice)
            return None
-        finally:
-            try:
-                os.unlink(out_path)
-            except OSError:
-                pass