import ArgumentParser import Dispatch import Foundation import MLX /// `madcat-say gpu-check` — confirm that MLX inference will run on the Apple /// Silicon Metal GPU rather than silently falling back to CPU, and that the /// compiled Metal shader library (`mlx.metallib`) is in place next to the /// binary. /// /// What it checks: /// 1. `GPU.deviceInfo()` reports an Apple GPU architecture (Metal present). /// 2. `mlx.metallib` exists next to the executable. Missing => shaders are /// JIT-compiled (~5x slower), or inference aborts with /// "Failed to load the default metallib". /// 3. A real matmul is forced to completion with `eval()`; the MLX /// active-memory delta proves the work was allocated on the Metal GPU. /// /// Exit code: 0 = PASS (Apple Metal GPU is the compute device), 2 = FAIL. struct GpuCheck: AsyncParsableCommand { static let configuration = CommandConfiguration( commandName: "gpu-check", abstract: "Confirm MLX is using the Apple Silicon Metal GPU (not CPU)." ) @Flag(name: .long, help: "Emit machine-readable JSON instead of text.") var json = false @Option(name: .long, help: "Edge length N of the NxN matmul probe (default 1024).") var probe: Int = 1024 func run() async throws { // 1. Metal device identity. Only a Metal device answers deviceInfo(); // on Apple Silicon the architecture string contains "apple". let info = GPU.deviceInfo() let architecture = info.architecture let isAppleGPU = architecture.lowercased().contains("apple") let totalMemory = info.memorySize let maxWorkingSet = Int(info.maxRecommendedWorkingSetSize) // 2. metallib presence (project convention: copied next to the binary). let (metallibPath, metallibFound) = Self.metallibStatus() // 3. Live GPU compute probe: matmul -> eval(). The cold eval pays the // Metal pipeline / metallib load; the warm eval is steady state. let n = max(8, probe) let activeBefore = Memory.activeMemory let a = ones([n, n]) let b = ones([n, n]) let coldStart = DispatchTime.now() var c = matmul(a, b) eval(c) let coldMs = Self.elapsedMs(since: coldStart) let warmStart = DispatchTime.now() c = matmul(c, b) eval(c) let warmMs = Self.elapsedMs(since: warmStart) let checksum = c.sum().item(Float.self) let memoryDelta = Memory.activeMemory - activeBefore let pass = isAppleGPU let report = Report( appleGPU: isAppleGPU, architecture: architecture, totalMemory: totalMemory, maxWorkingSet: maxWorkingSet, metallibPath: metallibPath, metallibFound: metallibFound, probe: n, coldMs: coldMs, warmMs: warmMs, checksum: checksum, memoryDelta: memoryDelta, pass: pass ) print(json ? report.json : report.text) if !pass { throw ExitCode(2) } } // MARK: - Helpers /// MLX expects `mlx.metallib` next to the executable (see the Makefile, /// which copies it out of speech-swift's build dir). private static func metallibStatus() -> (path: String, found: Bool) { let exe = Bundle.main.executableURL ?? URL(fileURLWithPath: CommandLine.arguments.first ?? "madcat-say") let dir = exe.resolvingSymlinksInPath().deletingLastPathComponent() let candidate = dir.appendingPathComponent("mlx.metallib") return (candidate.path, FileManager.default.fileExists(atPath: candidate.path)) } private static func elapsedMs(since start: DispatchTime) -> Double { Double(DispatchTime.now().uptimeNanoseconds &- start.uptimeNanoseconds) / 1_000_000 } static func human(_ bytes: Int) -> String { guard bytes > 0 else { return "0 B" } let units = ["B", "KB", "MB", "GB", "TB"] var value = Double(bytes) var i = 0 while value >= 1024 && i < units.count - 1 { value /= 1024 i += 1 } return String(format: "%.1f %@", value, units[i]) } } /// Result of a `gpu-check` run, with text + JSON renderings. private struct Report { let appleGPU: Bool let architecture: String let totalMemory: Int let maxWorkingSet: Int let metallibPath: String let metallibFound: Bool let probe: Int let coldMs: Double let warmMs: Double let checksum: Float let memoryDelta: Int let pass: Bool var text: String { var lines: [String] = [] lines.append("madcat-say gpu-check") lines.append("--------------------") lines.append("Metal GPU : \(architecture) (Apple Silicon: \(appleGPU ? "YES" : "NO"))") lines.append("Total memory : \(GpuCheck.human(totalMemory))") lines.append("Max working set : \(GpuCheck.human(maxWorkingSet))") lines.append("MLX metallib : \(metallibFound ? "found" : "MISSING") (\(metallibPath))") if !metallibFound { lines.append(" WARNING: shaders will JIT-compile (~5x slower) or fail to load.") lines.append(" Fix: run `make` (copies speech-swift's mlx.metallib next to the binary).") } lines.append("Compute probe : \(probe)x\(probe) matmul on GPU") lines.append(String(format: " cold eval : %.2f ms (Metal pipeline / metallib load)", coldMs)) lines.append(String(format: " warm eval : %.2f ms", warmMs)) lines.append(" GPU mem delta : \(GpuCheck.human(max(0, memoryDelta))) active") lines.append(String(format: " checksum : %.0f", checksum)) lines.append("VERDICT: " + (pass ? "PASS - MLX inference will run on the Metal GPU." : "FAIL - no Apple Metal GPU detected; inference would fall back to CPU.")) return lines.joined(separator: "\n") } var json: String { let payload: [String: Any] = [ "apple_gpu": appleGPU, "architecture": architecture, "total_memory_bytes": totalMemory, "max_recommended_working_set_bytes": maxWorkingSet, "metallib_path": metallibPath, "metallib_found": metallibFound, "probe_dim": probe, "cold_eval_ms": coldMs, "warm_eval_ms": warmMs, "active_memory_delta_bytes": memoryDelta, "checksum": Double(checksum), "verdict": pass ? "PASS" : "FAIL" ] guard let data = try? JSONSerialization.data( withJSONObject: payload, options: [.prettyPrinted, .sortedKeys] ) else { return "{\"verdict\":\"\(pass ? "PASS" : "FAIL")\"}" } return String(decoding: data, as: UTF8.self) } }