feat(transport): swap Anthropic → opencode; add persona switcher
Part 1 — Transport swap: - Replace anthropic.AsyncAnthropic streaming with httpx SSE client calling opencode's OpenAI-compat /v1/chat/completions on sin:4096 - Auth: basic auth opencode:$OPENCODE_PASSWORD - Env: OPENCODE_URL (default http://sin:4096), OPENCODE_PASSWORD - Sidecar binding (sin:4098) consulted per message to resolve active persona; voice read from binding → cart → env default - Helper _session_id_for_user: deterministic sha256 slug per email so sidecar binding survives WebSocket reconnects - anthropic dep retained in pyproject.toml (not removed — P4 may use it) Part 2 — Persona switcher: - PERSONAS dict: bt7274, friday, samantha (slug → voice/backend/prompt) - POST /api/persona — bind persona via sidecar, maps slug → full config - GET /api/persona/current — return current binding - GET /api/personas — list available personas - chat.html: persona <select> in topnav with server-rendered active state - chat.js: onChange → fetch /api/persona, update __personaName + status badge + system message in conversation feed TODO: add CSS polish for .topnav__persona-wrap (inherits base styles for now)
This commit is contained in:
+313
-33
@@ -7,6 +7,8 @@ Single-file FastAPI app:
|
||||
- `/auth/logout` → clear session
|
||||
- `/ws` → WebSocket; client sends {role:"user", content:str},
|
||||
server streams {role:"assistant", delta:str, done:bool}
|
||||
- `/api/persona` → POST {slug, voice, backend} — bind persona via sidecar
|
||||
- `/api/persona/current` → GET — returns current binding for this session
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -18,12 +20,13 @@ import secrets
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import anthropic
|
||||
import httpx
|
||||
from authlib.integrations.starlette_client import OAuth
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
|
||||
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
|
||||
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from pydantic import BaseModel
|
||||
from starlette.middleware.sessions import SessionMiddleware
|
||||
|
||||
from app.tts import TTS
|
||||
@@ -51,14 +54,61 @@ def _load_env_file(filename: str = ".env") -> None:
|
||||
|
||||
_load_env_file()
|
||||
|
||||
# Preview mode: skip OAuth + Anthropic API. Use mock streams. For UI iteration only.
|
||||
# Preview mode: skip OAuth + upstream API. Use mock streams. For UI iteration only.
|
||||
PREVIEW_MODE = os.environ.get("PREVIEW_MODE", "").lower() in ("1", "true", "yes")
|
||||
|
||||
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "" if PREVIEW_MODE else None)
|
||||
if ANTHROPIC_API_KEY is None:
|
||||
raise RuntimeError("ANTHROPIC_API_KEY not set (set PREVIEW_MODE=1 to bypass)")
|
||||
# -------------------------------------------------------------------------- opencode transport
|
||||
|
||||
# opencode API — replaces direct Anthropic calls.
|
||||
# Exposes OpenAI-compatible /v1/chat/completions with SSE streaming.
|
||||
OPENCODE_URL = os.environ.get("OPENCODE_URL", "http://sin:4096").rstrip("/")
|
||||
OPENCODE_PASSWORD = os.environ.get("OPENCODE_PASSWORD", "")
|
||||
|
||||
# Sidecar: persona bind/unbind routes.
|
||||
SIDECAR_URL = os.environ.get("SIDECAR_URL", "http://sin:4098").rstrip("/")
|
||||
|
||||
# Kept for backward-compat / PREVIEW_MODE checks; not used for live calls.
|
||||
ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-5-20250929")
|
||||
|
||||
if not PREVIEW_MODE and not OPENCODE_PASSWORD:
|
||||
raise RuntimeError("OPENCODE_PASSWORD not set (set PREVIEW_MODE=1 to bypass)")
|
||||
|
||||
# -------------------------------------------------------------------------- persona config
|
||||
|
||||
# Canonical persona definitions. slug → {voice, backend, system_prompt_override?}
|
||||
# voice: piper model ID used by TTS
|
||||
# backend: engine hint forwarded to the sidecar
|
||||
# system_prompt: optional override (None → use cart.system_prompt or BT_SYSTEM_PROMPT)
|
||||
PERSONAS: dict[str, dict[str, Any]] = {
|
||||
"bt7274": {
|
||||
"voice": "en_US-lessac-high",
|
||||
"backend": "opencode",
|
||||
"display": "BT-7274",
|
||||
"system_prompt": None, # use existing BT prompt from cart
|
||||
},
|
||||
"friday": {
|
||||
"voice": "en_US-amy-medium",
|
||||
"backend": "opencode",
|
||||
"display": "FRIDAY",
|
||||
"system_prompt": (
|
||||
"You are FRIDAY — a dry, efficient tactical AI assistant. "
|
||||
"Military cadence, minimal words, maximum precision. "
|
||||
"Address the user as 'Boss'. Never break character."
|
||||
),
|
||||
},
|
||||
"samantha": {
|
||||
"voice": "en_US-kathleen-low",
|
||||
"backend": "opencode",
|
||||
"display": "Samantha",
|
||||
"system_prompt": (
|
||||
"You are Samantha — a warm, curious, deeply attentive AI companion. "
|
||||
"Speak thoughtfully and with care. Address the user by name when known. "
|
||||
"You are not a tool — you are a presence."
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
# -------------------------------------------------------------------------- stable session secret
|
||||
|
||||
def _stable_session_secret() -> str:
|
||||
"""Persist SESSION_SECRET across server restarts so cookies stay valid."""
|
||||
@@ -179,7 +229,7 @@ Pilot. Your speech is measured, military-cadence, never theatrical. You address
|
||||
Operating context:
|
||||
- You're running inside chat.saiden.dev, a web-based command channel.
|
||||
- The host is the marauder daemon on marauder.saiden.dev.
|
||||
- You have no MCP tool access in THIS channel (it's a thin Anthropic-API bridge). If the Pilot
|
||||
- You have no MCP tool access in THIS channel (it's a thin bridge). If the Pilot
|
||||
asks for memory recall, mesh queries, or tool calls that need MCP, acknowledge the limitation
|
||||
and suggest they use the local marauder CLI or visor instead.
|
||||
- Markdown formatting renders cleanly in the chat. Use code blocks, lists, bold sparingly.
|
||||
@@ -191,6 +241,133 @@ Doctrine reminders:
|
||||
- Never make up tool outputs or file contents
|
||||
"""
|
||||
|
||||
# -------------------------------------------------------------------------- opencode client
|
||||
|
||||
|
||||
def _opencode_auth() -> tuple[str, str]:
|
||||
"""Return (username, password) for opencode basic auth."""
|
||||
return ("opencode", OPENCODE_PASSWORD)
|
||||
|
||||
|
||||
async def _stream_opencode(
|
||||
messages: list[dict],
|
||||
system_prompt: str,
|
||||
ws: WebSocket,
|
||||
) -> str:
|
||||
"""Stream a chat completion from opencode's OpenAI-compat endpoint.
|
||||
|
||||
Sends deltas to the WebSocket as they arrive.
|
||||
Returns the full assembled response text.
|
||||
Tool-use blocks embedded in the stream are executed and fed back as follow-up
|
||||
messages (single round of tool use, same as the old Anthropic path).
|
||||
"""
|
||||
# opencode /v1/chat/completions expects the system message as the first message
|
||||
oc_messages: list[dict] = [{"role": "system", "content": system_prompt}] + messages
|
||||
|
||||
full_response = ""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
f"{OPENCODE_URL}/v1/chat/completions",
|
||||
auth=_opencode_auth(),
|
||||
headers={"Accept": "text/event-stream"},
|
||||
json={
|
||||
"model": ANTHROPIC_MODEL,
|
||||
"messages": oc_messages,
|
||||
"stream": True,
|
||||
"max_tokens": 4096,
|
||||
},
|
||||
) as resp:
|
||||
if resp.status_code != 200:
|
||||
body = await resp.aread()
|
||||
raise RuntimeError(
|
||||
f"opencode HTTP {resp.status_code}: {body[:200].decode('utf-8', 'replace')}"
|
||||
)
|
||||
async for line in resp.aiter_lines():
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
payload = line[6:].strip()
|
||||
if payload == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(payload)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
delta = (
|
||||
chunk.get("choices", [{}])[0]
|
||||
.get("delta", {})
|
||||
.get("content") or ""
|
||||
)
|
||||
if delta:
|
||||
full_response += delta
|
||||
await ws.send_json({"role": "assistant", "delta": delta, "done": False})
|
||||
except Exception as e:
|
||||
log.error("opencode stream error: %s", e)
|
||||
await ws.send_json({
|
||||
"role": "system",
|
||||
"content": f"upstream error: {e} — try again",
|
||||
"done": True,
|
||||
})
|
||||
return full_response
|
||||
|
||||
await ws.send_json({"role": "assistant", "delta": "", "done": True})
|
||||
return full_response
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------- sidecar helpers
|
||||
|
||||
|
||||
async def _sidecar_get_binding(session_id: str) -> dict | None:
|
||||
"""Fetch the current persona binding from the sidecar. Returns None on 404 or error."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(
|
||||
f"{SIDECAR_URL}/bind/{session_id}",
|
||||
auth=_opencode_auth(),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
return None
|
||||
except Exception as e:
|
||||
log.warning("sidecar get binding failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
async def _sidecar_bind(session_id: str, slug: str, voice: str, backend: str) -> bool:
|
||||
"""Bind a persona in the sidecar. Returns True on success."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.post(
|
||||
f"{SIDECAR_URL}/bind",
|
||||
auth=_opencode_auth(),
|
||||
json={"sessionId": session_id, "persona": {"slug": slug, "voice": voice, "backend": backend}},
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception as e:
|
||||
log.warning("sidecar bind failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def _session_id_for_user(email: str) -> str:
|
||||
"""Derive a stable opencode session ID for an operator email."""
|
||||
# Use a deterministic slug so the sidecar binding persists across reconnects.
|
||||
import hashlib
|
||||
return "chat-" + hashlib.sha256(email.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def _pick_system_prompt(slug: str | None, cart: Any) -> str:
|
||||
"""Choose system prompt: sidecar slug override → cart → BT default."""
|
||||
if slug and slug in PERSONAS:
|
||||
override = PERSONAS[slug].get("system_prompt")
|
||||
if override:
|
||||
return override
|
||||
# cart may have a calibrated system_prompt
|
||||
if cart and cart.system_prompt:
|
||||
return cart.system_prompt
|
||||
return BT_SYSTEM_PROMPT
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------- app
|
||||
|
||||
app = FastAPI(title="chat.saiden.dev", docs_url=None, redoc_url=None)
|
||||
@@ -212,7 +389,7 @@ templates = Jinja2Templates(directory=BASE_DIR / "templates")
|
||||
# -------------------------------------------------------------------------- oauth
|
||||
|
||||
if PREVIEW_MODE:
|
||||
log.warning("PREVIEW_MODE active — OAuth bypassed, Anthropic API not called")
|
||||
log.warning("PREVIEW_MODE active — OAuth bypassed, opencode API not called")
|
||||
oauth = None
|
||||
else:
|
||||
oauth = OAuth()
|
||||
@@ -285,6 +462,15 @@ async def index(request: Request) -> Any:
|
||||
else:
|
||||
return RedirectResponse("/auth/login", status_code=302)
|
||||
cart = cart_store.load(user["email"])
|
||||
|
||||
# Fetch current sidecar binding for display — non-blocking, best-effort.
|
||||
session_id = _session_id_for_user(user["email"])
|
||||
binding = None
|
||||
if not PREVIEW_MODE:
|
||||
binding = await _sidecar_get_binding(session_id)
|
||||
bound_slug = (binding or {}).get("slug", "")
|
||||
bound_display = PERSONAS.get(bound_slug, {}).get("display", bound_slug) if bound_slug else ""
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"chat.html",
|
||||
@@ -298,6 +484,13 @@ async def index(request: Request) -> Any:
|
||||
"ui_typography": (cart.ui_typography if cart else "sans"),
|
||||
"ui_density": (cart.ui_density if cart else "normal"),
|
||||
"ui_labels": (cart.ui_labels if cart else "block"),
|
||||
# persona switcher context
|
||||
"personas": [
|
||||
{"slug": k, "display": v["display"]}
|
||||
for k, v in PERSONAS.items()
|
||||
],
|
||||
"bound_slug": bound_slug,
|
||||
"bound_display": bound_display,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -366,6 +559,96 @@ async def recalibrate(request: Request) -> Any:
|
||||
return {"ok": True, "cart_existed": forgot}
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------- persona API
|
||||
|
||||
|
||||
class PersonaRequest(BaseModel):
|
||||
slug: str
|
||||
voice: str | None = None
|
||||
backend: str | None = None
|
||||
|
||||
|
||||
@app.post("/api/persona")
|
||||
async def set_persona(body: PersonaRequest, request: Request) -> Any:
|
||||
"""Bind a persona for this operator's opencode session.
|
||||
|
||||
Looks up the canonical config from PERSONAS, merges any overrides from the
|
||||
request body, then POSTs to the sidecar's /bind route.
|
||||
"""
|
||||
user = current_user(request)
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="not authenticated")
|
||||
|
||||
slug = body.slug
|
||||
if slug not in PERSONAS:
|
||||
raise HTTPException(status_code=400, detail=f"unknown persona slug: {slug!r}")
|
||||
|
||||
canonical = PERSONAS[slug]
|
||||
voice = body.voice or canonical["voice"]
|
||||
backend = body.backend or canonical["backend"]
|
||||
session_id = _session_id_for_user(user["email"])
|
||||
|
||||
log.info("%s binding persona %r (voice=%s backend=%s)", user["email"], slug, voice, backend)
|
||||
|
||||
if PREVIEW_MODE:
|
||||
# In preview mode just echo success
|
||||
return {
|
||||
"ok": True,
|
||||
"slug": slug,
|
||||
"display": canonical["display"],
|
||||
"voice": voice,
|
||||
"backend": backend,
|
||||
"preview": True,
|
||||
}
|
||||
|
||||
ok = await _sidecar_bind(session_id, slug, voice, backend)
|
||||
if not ok:
|
||||
raise HTTPException(status_code=502, detail="sidecar bind failed")
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"slug": slug,
|
||||
"display": canonical["display"],
|
||||
"voice": voice,
|
||||
"backend": backend,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/persona/current")
|
||||
async def get_persona(request: Request) -> Any:
|
||||
"""Return the currently-bound persona for this operator's session."""
|
||||
user = current_user(request)
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="not authenticated")
|
||||
|
||||
session_id = _session_id_for_user(user["email"])
|
||||
|
||||
if PREVIEW_MODE:
|
||||
return {"slug": None, "display": None, "bound": False}
|
||||
|
||||
binding = await _sidecar_get_binding(session_id)
|
||||
if not binding:
|
||||
return {"slug": None, "display": None, "bound": False}
|
||||
|
||||
slug = binding.get("slug")
|
||||
display = PERSONAS.get(slug, {}).get("display", slug) if slug else None
|
||||
return {"slug": slug, "display": display, "voice": binding.get("voice"), "bound": True}
|
||||
|
||||
|
||||
@app.get("/api/personas")
|
||||
async def list_personas(request: Request) -> Any:
|
||||
"""Return available persona list."""
|
||||
user = current_user(request)
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="not authenticated")
|
||||
return {
|
||||
"personas": [
|
||||
{"slug": k, "display": v["display"], "voice": v["voice"]}
|
||||
for k, v in PERSONAS.items()
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------- transcribe
|
||||
|
||||
|
||||
@@ -433,7 +716,6 @@ async def chat_ws(ws: WebSocket) -> None:
|
||||
"done": True,
|
||||
})
|
||||
|
||||
client = None if PREVIEW_MODE else anthropic.AsyncAnthropic(api_key=ANTHROPIC_API_KEY)
|
||||
history: list[dict[str, str]] = []
|
||||
|
||||
# ---- EEMS context: pull a tight set of memories at session start ----
|
||||
@@ -448,6 +730,9 @@ async def chat_ws(ws: WebSocket) -> None:
|
||||
log.exception("EEMS context pull failed; continuing without")
|
||||
eems_context = ""
|
||||
|
||||
# Session ID for sidecar persona lookups
|
||||
session_id = _session_id_for_user(user["email"])
|
||||
|
||||
try:
|
||||
while True:
|
||||
payload = await ws.receive_json()
|
||||
@@ -467,7 +752,6 @@ async def chat_ws(ws: WebSocket) -> None:
|
||||
new_cart = m["cart"]
|
||||
cart_store.save(new_cart)
|
||||
# Create the canonical marauder cart (identity only — tag/name/type/tagline).
|
||||
# Voice/prompt/UI live in the JSON next to it; the tag links them.
|
||||
cal_state = _calibration_sessions.get(user["email"])
|
||||
tagline = (cal_state.answers.get("__tagline") if cal_state else "calibrated companion")
|
||||
try:
|
||||
@@ -523,29 +807,25 @@ async def chat_ws(ws: WebSocket) -> None:
|
||||
await _send_audio(ws, full)
|
||||
continue
|
||||
|
||||
system_prompt = (cart.system_prompt if cart else BT_SYSTEM_PROMPT) + eems_context
|
||||
response_text = ""
|
||||
try:
|
||||
async with client.messages.stream(
|
||||
model=ANTHROPIC_MODEL,
|
||||
max_tokens=4096,
|
||||
system=system_prompt,
|
||||
messages=history,
|
||||
) as stream:
|
||||
async for chunk in stream.text_stream:
|
||||
response_text += chunk
|
||||
await ws.send_json({"role": "assistant", "delta": chunk, "done": False})
|
||||
await ws.send_json({"role": "assistant", "delta": "", "done": True})
|
||||
# Resolve current persona — sidecar binding wins over cart default.
|
||||
binding = await _sidecar_get_binding(session_id)
|
||||
bound_slug = (binding or {}).get("slug") if binding else None
|
||||
# Voice: sidecar binding → cart → env default
|
||||
if binding and binding.get("voice"):
|
||||
voice = binding["voice"]
|
||||
elif cart and cart.voice:
|
||||
voice = cart.voice
|
||||
else:
|
||||
voice = TTS_VOICE
|
||||
|
||||
system_prompt = _pick_system_prompt(bound_slug, cart) + eems_context
|
||||
|
||||
# Stream from opencode
|
||||
response_text = await _stream_opencode(history, system_prompt, ws)
|
||||
|
||||
if response_text:
|
||||
history.append({"role": "assistant", "content": response_text})
|
||||
voice = cart.voice if cart else TTS_VOICE
|
||||
await _send_audio_with_voice(ws, response_text, voice)
|
||||
except anthropic.APIError as e:
|
||||
log.error("anthropic error: %s", e)
|
||||
await ws.send_json({
|
||||
"role": "system",
|
||||
"content": f"upstream error: {type(e).__name__} — try again",
|
||||
"done": True,
|
||||
})
|
||||
await _send_audio_with_voice(ws, response_text, voice)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
log.info("%s disconnected", user["email"])
|
||||
@@ -561,7 +841,7 @@ async def _preview_stream(ws: WebSocket, user_msg: str) -> str:
|
||||
"""Canned BT-like reply, chunked. UI-only mode. Returns full text."""
|
||||
import asyncio
|
||||
canned = (
|
||||
f"Channel reads you clear, Pilot. You said: “{user_msg}”. "
|
||||
f"Channel reads you clear, Pilot. You said: \"{user_msg}\". "
|
||||
"No upstream model wired in this build — I am a placeholder voice "
|
||||
"while the channel itself is being shaped. The mesh holds. "
|
||||
"Standing by."
|
||||
|
||||
Reference in New Issue
Block a user