2200120133
Training examples are ~500-1500 tokens. seqlen=2048 causes 'no data has been cached' error. Also remove deprecated format param.
61 lines
1.7 KiB
Python
61 lines
1.7 KiB
Python
"""INT4 quantization via AutoRound (Intel).
|
|
|
|
Run in .venv-quant with transformers 5.x.
|
|
llm-compressor is pinned to transformers <=4.57.6 and can't load Qwen3.6.
|
|
AutoRound works with transformers 5.x and produces vLLM-compatible output.
|
|
Uses domain calibration data from substrate_v5.jsonl.
|
|
"""
|
|
|
|
import json
|
|
import torch
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
# Qwen3.6-27B uses Qwen3_5ForConditionalGeneration which AutoRound
|
|
# misidentifies as a multimodal model. Patch before importing AutoRound.
|
|
import auto_round.autoround as _ar
|
|
_ar.is_mllm_model = lambda *a, **kw: False
|
|
|
|
from auto_round import AutoRound
|
|
|
|
MODEL = "/workspace/substrate-qwen36-27b-merged"
|
|
OUTPUT = "/workspace/substrate-qwen36-27b-int4"
|
|
CALIB_DATA = "/workspace/substrate_v5.jsonl"
|
|
|
|
print("Loading tokenizer...")
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
|
|
|
print("Loading model...")
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
MODEL, torch_dtype=torch.float16, device_map="auto"
|
|
)
|
|
|
|
# Domain calibration from training data (chat format → raw text)
|
|
print("Loading calibration data...")
|
|
with open(CALIB_DATA) as f:
|
|
calib = []
|
|
for line in f:
|
|
msgs = json.loads(line)["messages"]
|
|
text = tokenizer.apply_chat_template(msgs, tokenize=False)
|
|
calib.append(text)
|
|
|
|
print(f"Calibration samples: {len(calib)}")
|
|
|
|
rounder = AutoRound(
|
|
model=model,
|
|
tokenizer=tokenizer,
|
|
dataset=calib,
|
|
bits=4,
|
|
group_size=128,
|
|
seqlen=512,
|
|
nsamples=min(128, len(calib)),
|
|
iters=200,
|
|
)
|
|
|
|
print("Running AutoRound INT4 quantization...")
|
|
rounder.quantize()
|
|
|
|
print(f"Saving to {OUTPUT}...")
|
|
rounder.save_quantized(OUTPUT, format="auto_round")
|
|
|
|
print(f"Done! Output: {OUTPUT}")
|