add AWQ quantization script (llm-compressor)

This commit is contained in:
marauder-actual
2026-06-01 04:15:15 +02:00
parent 26e776db71
commit 0fa46c9fed
+49
View File
@@ -0,0 +1,49 @@
"""AWQ quantization via llm-compressor.
Run in .venv-quant (llmcompressor + transformers 4.57.6).
Uses domain calibration data from substrate_v5.jsonl.
"""
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from llmcompressor.modifiers.quantization import QuantizationModifier
from llmcompressor import oneshot
MODEL = "/workspace/substrate-qwen36-27b-merged"
OUTPUT = "/workspace/substrate-qwen36-27b-awq"
CALIB_DATA = "/workspace/substrate_v5.jsonl"
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL)
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL, torch_dtype=torch.float16, device_map="auto"
)
# Domain calibration from training data
print("Loading calibration data...")
with open(CALIB_DATA) as f:
calib = [json.loads(line)["text"] for line in f][:128]
print(f"Calibration samples: {len(calib)}")
recipe = QuantizationModifier(
targets="Linear",
scheme="W4A16",
ignore=["lm_head"],
)
print("Running AWQ quantization...")
oneshot(
model=model,
tokenizer=tokenizer,
dataset=calib,
recipe=recipe,
output_dir=OUTPUT,
max_seq_length=2048,
num_calibration_samples=len(calib),
)
print(f"Done! Output: {OUTPUT}")