add AWQ quantization script (llm-compressor)
This commit is contained in:
@@ -0,0 +1,49 @@
|
|||||||
|
"""AWQ quantization via llm-compressor.
|
||||||
|
|
||||||
|
Run in .venv-quant (llmcompressor + transformers 4.57.6).
|
||||||
|
Uses domain calibration data from substrate_v5.jsonl.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import torch
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
from llmcompressor.modifiers.quantization import QuantizationModifier
|
||||||
|
from llmcompressor import oneshot
|
||||||
|
|
||||||
|
MODEL = "/workspace/substrate-qwen36-27b-merged"
|
||||||
|
OUTPUT = "/workspace/substrate-qwen36-27b-awq"
|
||||||
|
CALIB_DATA = "/workspace/substrate_v5.jsonl"
|
||||||
|
|
||||||
|
print("Loading tokenizer...")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
||||||
|
|
||||||
|
print("Loading model...")
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
MODEL, torch_dtype=torch.float16, device_map="auto"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Domain calibration from training data
|
||||||
|
print("Loading calibration data...")
|
||||||
|
with open(CALIB_DATA) as f:
|
||||||
|
calib = [json.loads(line)["text"] for line in f][:128]
|
||||||
|
|
||||||
|
print(f"Calibration samples: {len(calib)}")
|
||||||
|
|
||||||
|
recipe = QuantizationModifier(
|
||||||
|
targets="Linear",
|
||||||
|
scheme="W4A16",
|
||||||
|
ignore=["lm_head"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Running AWQ quantization...")
|
||||||
|
oneshot(
|
||||||
|
model=model,
|
||||||
|
tokenizer=tokenizer,
|
||||||
|
dataset=calib,
|
||||||
|
recipe=recipe,
|
||||||
|
output_dir=OUTPUT,
|
||||||
|
max_seq_length=2048,
|
||||||
|
num_calibration_samples=len(calib),
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Done! Output: {OUTPUT}")
|
||||||
Reference in New Issue
Block a user