diff --git a/app/main.py b/app/main.py index 9cb8430..6d12915 100644 --- a/app/main.py +++ b/app/main.py @@ -60,16 +60,16 @@ PREVIEW_MODE = os.environ.get("PREVIEW_MODE", "").lower() in ("1", "true", "yes" # -------------------------------------------------------------------------- opencode transport -# opencode API — replaces direct Anthropic calls. -# Exposes OpenAI-compatible /v1/chat/completions with SSE streaming. +# opencode API — uses the opencode server HTTP API (POST /session/:id/message). OPENCODE_URL = os.environ.get("OPENCODE_URL", "http://sin:4096").rstrip("/") OPENCODE_PASSWORD = os.environ.get("OPENCODE_PASSWORD", "") # Sidecar: persona bind/unbind routes. SIDECAR_URL = os.environ.get("SIDECAR_URL", "http://sin:4098").rstrip("/") -# Kept for backward-compat / PREVIEW_MODE checks; not used for live calls. -ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-5-20250929") +# Model to use for opencode sessions. +OPENCODE_MODEL = os.environ.get("OPENCODE_MODEL", "claude-sonnet-4-6") +OPENCODE_PROVIDER = os.environ.get("OPENCODE_PROVIDER", "anthropic") if not PREVIEW_MODE and not OPENCODE_PASSWORD: raise RuntimeError("OPENCODE_PASSWORD not set (set PREVIEW_MODE=1 to bypass)") @@ -250,61 +250,78 @@ def _opencode_auth() -> tuple[str, str]: return ("opencode", OPENCODE_PASSWORD) -async def _stream_opencode( - messages: list[dict], +# Per-user opencode session cache: email → session_id. +# Sessions are reused across WebSocket reconnects so conversation history persists. +_opencode_sessions: dict[str, str] = {} + + +async def _ensure_opencode_session(email: str) -> str: + """Return an opencode session ID for this user, creating one if needed.""" + if email in _opencode_sessions: + return _opencode_sessions[email] + async with httpx.AsyncClient(timeout=30.0) as client: + resp = await client.post( + f"{OPENCODE_URL}/session", + auth=_opencode_auth(), + headers={"x-opencode-directory": "/home/madcat"}, + json={"title": f"chat-saiden:{email}"}, + ) + resp.raise_for_status() + sid = resp.json()["id"] + _opencode_sessions[email] = sid + log.info("created opencode session %s for %s", sid, email) + return sid + + +async def _send_to_opencode( + user_text: str, system_prompt: str, + session_id: str, ws: WebSocket, ) -> str: - """Stream a chat completion from opencode's OpenAI-compat endpoint. + """Send a message to an opencode session and stream the response to the WebSocket. - Sends deltas to the WebSocket as they arrive. - Returns the full assembled response text. - Tool-use blocks embedded in the stream are executed and fed back as follow-up - messages (single round of tool use, same as the old Anthropic path). + Uses POST /session/:id/message (synchronous — waits for full LLM response). + Then streams the text word-by-word to the WebSocket for the typewriter effect. + Returns the full response text. """ - # opencode /v1/chat/completions expects the system message as the first message - oc_messages: list[dict] = [{"role": "system", "content": system_prompt}] + messages - full_response = "" try: async with httpx.AsyncClient(timeout=120.0) as client: - async with client.stream( - "POST", - f"{OPENCODE_URL}/v1/chat/completions", + resp = await client.post( + f"{OPENCODE_URL}/session/{session_id}/message", auth=_opencode_auth(), - headers={"Accept": "text/event-stream"}, + headers={"x-opencode-directory": "/home/madcat"}, json={ - "model": ANTHROPIC_MODEL, - "messages": oc_messages, - "stream": True, - "max_tokens": 4096, + "parts": [{"type": "text", "text": user_text}], + "system": system_prompt, + "model": { + "providerID": OPENCODE_PROVIDER, + "modelID": OPENCODE_MODEL, + }, }, - ) as resp: - if resp.status_code != 200: - body = await resp.aread() - raise RuntimeError( - f"opencode HTTP {resp.status_code}: {body[:200].decode('utf-8', 'replace')}" - ) - async for line in resp.aiter_lines(): - if not line.startswith("data: "): - continue - payload = line[6:].strip() - if payload == "[DONE]": - break - try: - chunk = json.loads(payload) - except json.JSONDecodeError: - continue - delta = ( - chunk.get("choices", [{}])[0] - .get("delta", {}) - .get("content") or "" - ) - if delta: - full_response += delta - await ws.send_json({"role": "assistant", "delta": delta, "done": False}) + ) + if resp.status_code != 200: + raise RuntimeError( + f"opencode HTTP {resp.status_code}: {resp.text[:200]}" + ) + data = resp.json() + # Extract text from response parts + parts = data.get("parts", []) + for part in parts: + if part.get("type") == "text": + full_response += part.get("text", "") + + # Stream text word-by-word to the WebSocket for typewriter effect + if full_response: + words = full_response.split(" ") + for i, word in enumerate(words): + delta = word if i == 0 else " " + word + await ws.send_json({"role": "assistant", "delta": delta, "done": False}) + await asyncio.sleep(0.03) # ~30ms per word — fast typewriter + except Exception as e: - log.error("opencode stream error: %s", e) + log.error("opencode error: %s", e) await ws.send_json({ "role": "system", "content": f"upstream error: {e} — try again", @@ -807,8 +824,9 @@ async def chat_ws(ws: WebSocket) -> None: system_prompt = _pick_system_prompt(bound_slug, cart) + eems_context - # Stream from opencode - response_text = await _stream_opencode(history, system_prompt, ws) + # Send to opencode and stream response + oc_session = await _ensure_opencode_session(user["email"]) + response_text = await _send_to_opencode(user_msg, system_prompt, oc_session, ws) if response_text: history.append({"role": "assistant", "content": response_text})