diff --git a/app/main.py b/app/main.py
index 9cb8430..6d12915 100644
--- a/app/main.py
+++ b/app/main.py
@@ -60,16 +60,16 @@ PREVIEW_MODE = os.environ.get("PREVIEW_MODE", "").lower() in ("1", "true", "yes"
 
 # -------------------------------------------------------------------------- opencode transport
 
-# opencode API — replaces direct Anthropic calls.
-# Exposes OpenAI-compatible /v1/chat/completions with SSE streaming.
+# opencode API — uses the opencode server HTTP API (POST /session/:id/message).
 OPENCODE_URL = os.environ.get("OPENCODE_URL", "http://sin:4096").rstrip("/")
 OPENCODE_PASSWORD = os.environ.get("OPENCODE_PASSWORD", "")
 
 # Sidecar: persona bind/unbind routes.
 SIDECAR_URL = os.environ.get("SIDECAR_URL", "http://sin:4098").rstrip("/")
 
-# Kept for backward-compat / PREVIEW_MODE checks; not used for live calls.
-ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-5-20250929")
+# Model to use for opencode sessions.
+OPENCODE_MODEL = os.environ.get("OPENCODE_MODEL", "claude-sonnet-4-6")
+OPENCODE_PROVIDER = os.environ.get("OPENCODE_PROVIDER", "anthropic")
 
 if not PREVIEW_MODE and not OPENCODE_PASSWORD:
     raise RuntimeError("OPENCODE_PASSWORD not set (set PREVIEW_MODE=1 to bypass)")
@@ -250,61 +250,78 @@ def _opencode_auth() -> tuple[str, str]:
     return ("opencode", OPENCODE_PASSWORD)
 
 
-async def _stream_opencode(
-    messages: list[dict],
+# Per-user opencode session cache: email → session_id.
+# Sessions are reused across WebSocket reconnects so conversation history persists.
+_opencode_sessions: dict[str, str] = {}
+
+
+async def _ensure_opencode_session(email: str) -> str:
+    """Return an opencode session ID for this user, creating one if needed."""
+    if email in _opencode_sessions:
+        return _opencode_sessions[email]
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        resp = await client.post(
+            f"{OPENCODE_URL}/session",
+            auth=_opencode_auth(),
+            headers={"x-opencode-directory": "/home/madcat"},
+            json={"title": f"chat-saiden:{email}"},
+        )
+        resp.raise_for_status()
+        sid = resp.json()["id"]
+        _opencode_sessions[email] = sid
+        log.info("created opencode session %s for %s", sid, email)
+        return sid
+
+
+async def _send_to_opencode(
+    user_text: str,
     system_prompt: str,
+    session_id: str,
     ws: WebSocket,
 ) -> str:
-    """Stream a chat completion from opencode's OpenAI-compat endpoint.
+    """Send a message to an opencode session and stream the response to the WebSocket.
 
-    Sends deltas to the WebSocket as they arrive.
-    Returns the full assembled response text.
-    Tool-use blocks embedded in the stream are executed and fed back as follow-up
-    messages (single round of tool use, same as the old Anthropic path).
+    Uses POST /session/:id/message (synchronous — waits for full LLM response).
+    Then streams the text word-by-word to the WebSocket for the typewriter effect.
+    Returns the full response text.
     """
-    # opencode /v1/chat/completions expects the system message as the first message
-    oc_messages: list[dict] = [{"role": "system", "content": system_prompt}] + messages
-
     full_response = ""
     try:
         async with httpx.AsyncClient(timeout=120.0) as client:
-            async with client.stream(
-                "POST",
-                f"{OPENCODE_URL}/v1/chat/completions",
+            resp = await client.post(
+                f"{OPENCODE_URL}/session/{session_id}/message",
                 auth=_opencode_auth(),
-                headers={"Accept": "text/event-stream"},
+                headers={"x-opencode-directory": "/home/madcat"},
                 json={
-                    "model": ANTHROPIC_MODEL,
-                    "messages": oc_messages,
-                    "stream": True,
-                    "max_tokens": 4096,
+                    "parts": [{"type": "text", "text": user_text}],
+                    "system": system_prompt,
+                    "model": {
+                        "providerID": OPENCODE_PROVIDER,
+                        "modelID": OPENCODE_MODEL,
+                    },
                 },
-            ) as resp:
-                if resp.status_code != 200:
-                    body = await resp.aread()
-                    raise RuntimeError(
-                        f"opencode HTTP {resp.status_code}: {body[:200].decode('utf-8', 'replace')}"
-                    )
-                async for line in resp.aiter_lines():
-                    if not line.startswith("data: "):
-                        continue
-                    payload = line[6:].strip()
-                    if payload == "[DONE]":
-                        break
-                    try:
-                        chunk = json.loads(payload)
-                    except json.JSONDecodeError:
-                        continue
-                    delta = (
-                        chunk.get("choices", [{}])[0]
-                        .get("delta", {})
-                        .get("content") or ""
-                    )
-                    if delta:
-                        full_response += delta
-                        await ws.send_json({"role": "assistant", "delta": delta, "done": False})
+            )
+            if resp.status_code != 200:
+                raise RuntimeError(
+                    f"opencode HTTP {resp.status_code}: {resp.text[:200]}"
+                )
+            data = resp.json()
+            # Extract text from response parts
+            parts = data.get("parts", [])
+            for part in parts:
+                if part.get("type") == "text":
+                    full_response += part.get("text", "")
+
+        # Stream text word-by-word to the WebSocket for typewriter effect
+        if full_response:
+            words = full_response.split(" ")
+            for i, word in enumerate(words):
+                delta = word if i == 0 else " " + word
+                await ws.send_json({"role": "assistant", "delta": delta, "done": False})
+                await asyncio.sleep(0.03)  # ~30ms per word — fast typewriter
+
     except Exception as e:
-        log.error("opencode stream error: %s", e)
+        log.error("opencode error: %s", e)
         await ws.send_json({
             "role": "system",
             "content": f"upstream error: {e} — try again",
@@ -807,8 +824,9 @@ async def chat_ws(ws: WebSocket) -> None:
 
             system_prompt = _pick_system_prompt(bound_slug, cart) + eems_context
 
-            # Stream from opencode
-            response_text = await _stream_opencode(history, system_prompt, ws)
+            # Send to opencode and stream response
+            oc_session = await _ensure_opencode_session(user["email"])
+            response_text = await _send_to_opencode(user_msg, system_prompt, oc_session, ws)
 
             if response_text:
                 history.append({"role": "assistant", "content": response_text})