fix: convert chat messages to text for AutoRound calibration

This commit is contained in:
marauder-actual
2026-06-01 04:23:28 +02:00
parent 4edaeeb21b
commit 367ed705ab
+6 -2
View File
@@ -23,10 +23,14 @@ model = AutoModelForCausalLM.from_pretrained(
MODEL, torch_dtype=torch.float16, device_map="auto" MODEL, torch_dtype=torch.float16, device_map="auto"
) )
# Domain calibration from training data # Domain calibration from training data (chat format → raw text)
print("Loading calibration data...") print("Loading calibration data...")
with open(CALIB_DATA) as f: with open(CALIB_DATA) as f:
calib = [json.loads(line)["text"] for line in f] calib = []
for line in f:
msgs = json.loads(line)["messages"]
text = tokenizer.apply_chat_template(msgs, tokenize=False)
calib.append(text)
print(f"Calibration samples: {len(calib)}") print(f"Calibration samples: {len(calib)}")