chat/app/tts.py

"""HTTP TTS adapter for chat.saiden.dev — madcat-tts daemon (chatterbox).

Calls the madcat-tts daemon's OpenAI-compatible /v1/audio/speech endpoint
to synthesize text → WAV bytes via chatterbox voice cloning.

Designed to fail silently — if the daemon is down or synthesis errors,
the chat still works, just without voice.
"""

from __future__ import annotations

import logging
import os

import httpx

log = logging.getLogger("chat-saiden.tts")

MADCAT_TTS_URL = os.environ.get("MADCAT_TTS_URL", "http://localhost:14099")


class TTS:
    """HTTP-based madcat-tts synthesizer with graceful fallback."""

    def __init__(self, voice: str = "bt7274-en") -> None:
        self.voice = voice
        self._url = f"{MADCAT_TTS_URL.rstrip('/')}/v1/audio/speech"
        log.info("TTS enabled — voice=%s url=%s", voice, self._url)

    @property
    def available(self) -> bool:
        return True

    async def synthesize(self, text: str) -> bytes | None:
        """Return WAV bytes, or None on failure / unavailable."""
        if not text or not text.strip():
            return None
        try:
            async with httpx.AsyncClient(timeout=15.0) as client:
                resp = await client.post(
                    self._url,
                    json={
                        "input": text,
                        "voice": self.voice,
                        "response_format": "wav",
                    },
                )
                resp.raise_for_status()
                return resp.content
        except httpx.TimeoutException:
            log.warning("TTS timeout for voice=%s (text=%s…)", self.voice, text[:60])
            return None
        except Exception:
            log.exception("TTS synthesis failed for voice=%s", self.voice)
            return None