madcat-say/Sources/GpuCheck.swift

import ArgumentParser
import Dispatch
import Foundation
import MLX

/// `madcat-say gpu-check` — confirm that MLX inference will run on the Apple
/// Silicon Metal GPU rather than silently falling back to CPU, and that the
/// compiled Metal shader library (`mlx.metallib`) is in place next to the
/// binary.
///
/// What it checks:
///   1. `GPU.deviceInfo()` reports an Apple GPU architecture (Metal present).
///   2. `mlx.metallib` exists next to the executable. Missing => shaders are
///      JIT-compiled (~5x slower), or inference aborts with
///      "Failed to load the default metallib".
///   3. A real matmul is forced to completion with `eval()`; the MLX
///      active-memory delta proves the work was allocated on the Metal GPU.
///
/// Exit code: 0 = PASS (Apple Metal GPU is the compute device), 2 = FAIL.
struct GpuCheck: AsyncParsableCommand {
    static let configuration = CommandConfiguration(
        commandName: "gpu-check",
        abstract: "Confirm MLX is using the Apple Silicon Metal GPU (not CPU)."
    )

    @Flag(name: .long, help: "Emit machine-readable JSON instead of text.")
    var json = false

    @Option(name: .long, help: "Edge length N of the NxN matmul probe (default 1024).")
    var probe: Int = 1024

    func run() async throws {
        // 1. Metal device identity. Only a Metal device answers deviceInfo();
        //    on Apple Silicon the architecture string contains "apple".
        let info = GPU.deviceInfo()
        let architecture = info.architecture
        let isAppleGPU = architecture.lowercased().contains("apple")
        let totalMemory = info.memorySize
        let maxWorkingSet = Int(info.maxRecommendedWorkingSetSize)

        // 2. metallib presence (project convention: copied next to the binary).
        let (metallibPath, metallibFound) = Self.metallibStatus()

        // 3. Live GPU compute probe: matmul -> eval(). The cold eval pays the
        //    Metal pipeline / metallib load; the warm eval is steady state.
        let n = max(8, probe)
        let activeBefore = Memory.activeMemory

        let a = ones([n, n])
        let b = ones([n, n])

        let coldStart = DispatchTime.now()
        var c = matmul(a, b)
        eval(c)
        let coldMs = Self.elapsedMs(since: coldStart)

        let warmStart = DispatchTime.now()
        c = matmul(c, b)
        eval(c)
        let warmMs = Self.elapsedMs(since: warmStart)

        let checksum = c.sum().item(Float.self)
        let memoryDelta = Memory.activeMemory - activeBefore
        let pass = isAppleGPU

        let report = Report(
            appleGPU: isAppleGPU,
            architecture: architecture,
            totalMemory: totalMemory,
            maxWorkingSet: maxWorkingSet,
            metallibPath: metallibPath,
            metallibFound: metallibFound,
            probe: n,
            coldMs: coldMs,
            warmMs: warmMs,
            checksum: checksum,
            memoryDelta: memoryDelta,
            pass: pass
        )

        print(json ? report.json : report.text)

        if !pass {
            throw ExitCode(2)
        }
    }

    // MARK: - Helpers

    /// MLX expects `mlx.metallib` next to the executable (see the Makefile,
    /// which copies it out of speech-swift's build dir).
    private static func metallibStatus() -> (path: String, found: Bool) {
        let exe = Bundle.main.executableURL
            ?? URL(fileURLWithPath: CommandLine.arguments.first ?? "madcat-say")
        let dir = exe.resolvingSymlinksInPath().deletingLastPathComponent()
        let candidate = dir.appendingPathComponent("mlx.metallib")
        return (candidate.path, FileManager.default.fileExists(atPath: candidate.path))
    }

    private static func elapsedMs(since start: DispatchTime) -> Double {
        Double(DispatchTime.now().uptimeNanoseconds &- start.uptimeNanoseconds) / 1_000_000
    }

    static func human(_ bytes: Int) -> String {
        guard bytes > 0 else { return "0 B" }
        let units = ["B", "KB", "MB", "GB", "TB"]
        var value = Double(bytes)
        var i = 0
        while value >= 1024 && i < units.count - 1 {
            value /= 1024
            i += 1
        }
        return String(format: "%.1f %@", value, units[i])
    }
}

/// Result of a `gpu-check` run, with text + JSON renderings.
private struct Report {
    let appleGPU: Bool
    let architecture: String
    let totalMemory: Int
    let maxWorkingSet: Int
    let metallibPath: String
    let metallibFound: Bool
    let probe: Int
    let coldMs: Double
    let warmMs: Double
    let checksum: Float
    let memoryDelta: Int
    let pass: Bool

    var text: String {
        var lines: [String] = []
        lines.append("madcat-say gpu-check")
        lines.append("--------------------")
        lines.append("Metal GPU       : \(architecture)  (Apple Silicon: \(appleGPU ? "YES" : "NO"))")
        lines.append("Total memory    : \(GpuCheck.human(totalMemory))")
        lines.append("Max working set : \(GpuCheck.human(maxWorkingSet))")
        lines.append("MLX metallib    : \(metallibFound ? "found" : "MISSING")  (\(metallibPath))")
        if !metallibFound {
            lines.append("                  WARNING: shaders will JIT-compile (~5x slower) or fail to load.")
            lines.append("                  Fix: run `make` (copies speech-swift's mlx.metallib next to the binary).")
        }
        lines.append("Compute probe   : \(probe)x\(probe) matmul on GPU")
        lines.append(String(format: "  cold eval     : %.2f ms   (Metal pipeline / metallib load)", coldMs))
        lines.append(String(format: "  warm eval     : %.2f ms", warmMs))
        lines.append("  GPU mem delta : \(GpuCheck.human(max(0, memoryDelta))) active")
        lines.append(String(format: "  checksum      : %.0f", checksum))
        lines.append("VERDICT: " + (pass
            ? "PASS - MLX inference will run on the Metal GPU."
            : "FAIL - no Apple Metal GPU detected; inference would fall back to CPU."))
        return lines.joined(separator: "\n")
    }

    var json: String {
        let payload: [String: Any] = [
            "apple_gpu": appleGPU,
            "architecture": architecture,
            "total_memory_bytes": totalMemory,
            "max_recommended_working_set_bytes": maxWorkingSet,
            "metallib_path": metallibPath,
            "metallib_found": metallibFound,
            "probe_dim": probe,
            "cold_eval_ms": coldMs,
            "warm_eval_ms": warmMs,
            "active_memory_delta_bytes": memoryDelta,
            "checksum": Double(checksum),
            "verdict": pass ? "PASS" : "FAIL"
        ]
        guard let data = try? JSONSerialization.data(
            withJSONObject: payload, options: [.prettyPrinted, .sortedKeys]
        ) else {
            return "{\"verdict\":\"\(pass ? "PASS" : "FAIL")\"}"
        }
        return String(decoding: data, as: UTF8.self)
    }
}