ae384fe618
- tts.py: replace piper subprocess with HTTP POST to madcat-tts /v1/audio/speech (chatterbox voice cloning) - chat.js: replace whisper server upload with browser Web Speech API (webkitSpeechRecognition) - chat.css: style persona picker — appearance:none select, themed with CSS vars, mobile responsive - main.py: default TTS voice → bt7274-en
56 lines
1.8 KiB
Python
56 lines
1.8 KiB
Python
"""HTTP TTS adapter for chat.saiden.dev — madcat-tts daemon (chatterbox).
|
|
|
|
Calls the madcat-tts daemon's OpenAI-compatible /v1/audio/speech endpoint
|
|
to synthesize text → WAV bytes via chatterbox voice cloning.
|
|
|
|
Designed to fail silently — if the daemon is down or synthesis errors,
|
|
the chat still works, just without voice.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
|
|
import httpx
|
|
|
|
log = logging.getLogger("chat-saiden.tts")
|
|
|
|
MADCAT_TTS_URL = os.environ.get("MADCAT_TTS_URL", "http://localhost:14099")
|
|
|
|
|
|
class TTS:
|
|
"""HTTP-based madcat-tts synthesizer with graceful fallback."""
|
|
|
|
def __init__(self, voice: str = "bt7274-en") -> None:
|
|
self.voice = voice
|
|
self._url = f"{MADCAT_TTS_URL.rstrip('/')}/v1/audio/speech"
|
|
log.info("TTS enabled — voice=%s url=%s", voice, self._url)
|
|
|
|
@property
|
|
def available(self) -> bool:
|
|
return True
|
|
|
|
async def synthesize(self, text: str) -> bytes | None:
|
|
"""Return WAV bytes, or None on failure / unavailable."""
|
|
if not text or not text.strip():
|
|
return None
|
|
try:
|
|
async with httpx.AsyncClient(timeout=15.0) as client:
|
|
resp = await client.post(
|
|
self._url,
|
|
json={
|
|
"input": text,
|
|
"voice": self.voice,
|
|
"response_format": "wav",
|
|
},
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.content
|
|
except httpx.TimeoutException:
|
|
log.warning("TTS timeout for voice=%s (text=%s…)", self.voice, text[:60])
|
|
return None
|
|
except Exception:
|
|
log.exception("TTS synthesis failed for voice=%s", self.voice)
|
|
return None
|