Turn server wrapper into transparent proxy with /reload

Replace management endpoints (/start, /stop, /restart) with a transparent reverse proxy and hot-reload architecture. The wrapper now sits in front of sd-server, forwarding all requests and adding a /reload endpoint for model swapping without restarting the wrapper itself. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 20:51:16 +00:00
parent b33fe120fa
commit 61e68fee15
6 changed files with 203 additions and 124 deletions
@@ -1,13 +1,15 @@
-"""Tests for tensors.server package (FastAPI sd-server manager)."""
+"""Tests for tensors.server package (FastAPI sd-server proxy wrapper)."""

 from __future__ import annotations

-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch

+import httpx
 import pytest
 from fastapi.testclient import TestClient

 from tensors.server import create_app
+from tensors.server.models import ServerConfig
 from tensors.server.process import ProcessManager


@@ -22,7 +24,7 @@ def api() -> TestClient:


 def _get_pm(api: TestClient) -> ProcessManager:
-    return api.app.state.pm  # type: ignore[union-attr]
+    return api.app.state.pm  # type: ignore[no-any-return, attr-defined]


 class TestStatus:
@@ -37,7 +39,7 @@ class TestStatus:
        mock_proc.poll.return_value = None
        mock_proc.pid = 999
        pm.proc = mock_proc
-        pm.config = {"model": "/m.safetensors", "port": 1234, "args": []}
+        pm.config = ServerConfig(model="/m.safetensors")
        r = api.get("/status")
        data = r.json()
        assert data["running"] is True
@@ -54,69 +56,86 @@ class TestStatus:
        assert data["exit_code"] == 1


-class TestStart:
+class TestReload:
+    @patch.object(ProcessManager, "wait_ready", new_callable=AsyncMock, return_value=True)
    @patch("tensors.server.process.subprocess.Popen")
-    def test_start_success(self, mock_popen: MagicMock, api: TestClient) -> None:
+    def test_reload_swaps_model(self, mock_popen: MagicMock, mock_ready: AsyncMock, api: TestClient) -> None:
+        pm = _get_pm(api)
+        pm.config = ServerConfig(model="/old.gguf", port=5555, args=["--fa"])
        mock_popen.return_value.pid = 42
        mock_popen.return_value.poll.return_value = None
-        r = api.post("/start", json={"model": "/m.safetensors"})
-        assert r.status_code == 200
-        assert r.json()["started"] is True
-        assert r.json()["pid"] == 42
-
-    @patch("tensors.server.process.subprocess.Popen")
-    def test_start_already_running(self, mock_popen: MagicMock, api: TestClient) -> None:
-        pm = _get_pm(api)
-        mock_proc = MagicMock()
-        mock_proc.poll.return_value = None
-        pm.proc = mock_proc
-        r = api.post("/start", json={"model": "/m.safetensors"})
-        assert r.status_code == 409
-
-
-class TestStop:
-    def test_stop_not_running(self, api: TestClient) -> None:
-        r = api.post("/stop")
-        assert r.status_code == 409
-
-    def test_stop_running(self, api: TestClient) -> None:
-        pm = _get_pm(api)
-        mock_proc = MagicMock()
-        mock_proc.poll.return_value = None
-        mock_proc.wait.return_value = 0
-        pm.proc = mock_proc
-        r = api.post("/stop")
-        assert r.status_code == 200
-        assert r.json()["stopped"] is True
-        mock_proc.send_signal.assert_called_once()
-
-
-class TestRestart:
-    def test_restart_no_config_no_model(self, api: TestClient) -> None:
-        r = api.post("/restart", json={})
-        assert r.status_code == 400
-
-    @patch("tensors.server.process.subprocess.Popen")
-    def test_restart_with_new_model(self, mock_popen: MagicMock, api: TestClient) -> None:
-        mock_popen.return_value.pid = 100
-        mock_popen.return_value.poll.return_value = None
-        pm = _get_pm(api)
-        pm.config = {"model": "/old.safetensors", "port": 1234, "args": []}
-        r = api.post("/restart", json={"model": "/new.safetensors"})
+        r = api.post("/reload", json={"model": "/new.gguf"})
        assert r.status_code == 200
        data = r.json()
-        assert data["restarted"] is True
-        assert "/new.safetensors" in str(data["cmd"])
+        assert data["ok"] is True
+        assert data["model"] == "/new.gguf"
+        assert data["pid"] == 42
+        # Verify new config preserved port and args from previous config
+        assert pm.config is not None
+        assert pm.config.port == 5555
+        assert pm.config.args == ["--fa"]
+        assert pm.config.model == "/new.gguf"

+    @patch.object(ProcessManager, "wait_ready", new_callable=AsyncMock, return_value=False)
    @patch("tensors.server.process.subprocess.Popen")
-    def test_restart_keeps_previous_config(self, mock_popen: MagicMock, api: TestClient) -> None:
-        mock_popen.return_value.pid = 101
-        mock_popen.return_value.poll.return_value = None
+    def test_reload_fails_when_not_ready(self, mock_popen: MagicMock, mock_ready: AsyncMock, api: TestClient) -> None:
        pm = _get_pm(api)
-        pm.config = {"model": "/m.safetensors", "port": 5555, "args": ["--fa"]}
-        r = api.post("/restart", json={})
+        pm.config = ServerConfig(model="/old.gguf")
+        mock_popen.return_value.pid = 43
+        mock_popen.return_value.poll.return_value = None
+        r = api.post("/reload", json={"model": "/bad.gguf"})
+        assert r.status_code == 503
+        assert "failed" in r.json()["error"]
+
+    def test_reload_requires_model(self, api: TestClient) -> None:
+        r = api.post("/reload", json={})
+        assert r.status_code == 422
+
+
+class TestProxy:
+    def test_proxy_503_when_not_running(self, api: TestClient) -> None:
+        r = api.get("/v1/models")
+        assert r.status_code == 503
+        assert "not running" in r.json()["error"]
+
+    def test_proxy_forwards_request(self, api: TestClient) -> None:
+        pm = _get_pm(api)
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = None
+        mock_proc.pid = 100
+        pm.proc = mock_proc
+        pm.config = ServerConfig(model="/m.gguf", port=1234)
+
+        upstream_response = httpx.Response(
+            200,
+            json={"data": [{"id": "model-1"}]},
+            headers={"content-type": "application/json"},
+        )
+        mock_client = AsyncMock()
+        mock_client.request.return_value = upstream_response
+        api.app.state.client = mock_client  # type: ignore[attr-defined]
+
+        r = api.get("/v1/models")
        assert r.status_code == 200
-        assert "5555" in str(r.json()["cmd"])
+        assert r.json() == {"data": [{"id": "model-1"}]}
+        mock_client.request.assert_called_once()
+
+    def test_proxy_forwards_post_with_body(self, api: TestClient) -> None:
+        pm = _get_pm(api)
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = None
+        mock_proc.pid = 100
+        pm.proc = mock_proc
+        pm.config = ServerConfig(model="/m.gguf", port=1234)
+
+        upstream_response = httpx.Response(200, json={"ok": True})
+        mock_client = AsyncMock()
+        mock_client.request.return_value = upstream_response
+        api.app.state.client = mock_client  # type: ignore[attr-defined]
+
+        r = api.post("/v1/chat/completions", json={"prompt": "hello"})
+        assert r.status_code == 200
+        mock_client.request.assert_called_once()


 class TestProcessManager:
@@ -124,18 +143,27 @@ class TestProcessManager:
        assert pm.status() == {"running": False}

    def test_build_cmd(self, pm: ProcessManager) -> None:
-        config = {"model": "/m.gguf", "port": 1234, "args": ["--fa"]}
-        cmd = pm.build_cmd(config)
+        pm.config = ServerConfig(model="/m.gguf", port=1234, args=["--fa"])
+        cmd = pm.build_cmd()
        assert "/m.gguf" in cmd
        assert "--fa" in cmd
        assert "1234" in cmd

+    def test_build_cmd_no_config(self, pm: ProcessManager) -> None:
+        with pytest.raises(RuntimeError, match="No config"):
+            pm.build_cmd()
+
    @patch("tensors.server.process.subprocess.Popen")
    def test_start_and_stop(self, mock_popen: MagicMock, pm: ProcessManager) -> None:
        mock_popen.return_value.pid = 77
        mock_popen.return_value.poll.return_value = None
        mock_popen.return_value.wait.return_value = 0
-        pm.start({"model": "/m.gguf", "port": 1234, "args": []})
+        pm.start(ServerConfig(model="/m.gguf"))
        assert pm.proc is not None
        assert pm.stop() is True
        assert pm.proc is None
+
+    def test_server_config_defaults(self) -> None:
+        cfg = ServerConfig(model="/m.gguf")
+        assert cfg.port == 1234
+        assert cfg.args == []