fix: convert chat messages to text for AutoRound calibration
This commit is contained in:
+6
-2
@@ -23,10 +23,14 @@ model = AutoModelForCausalLM.from_pretrained(
|
|||||||
MODEL, torch_dtype=torch.float16, device_map="auto"
|
MODEL, torch_dtype=torch.float16, device_map="auto"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Domain calibration from training data
|
# Domain calibration from training data (chat format → raw text)
|
||||||
print("Loading calibration data...")
|
print("Loading calibration data...")
|
||||||
with open(CALIB_DATA) as f:
|
with open(CALIB_DATA) as f:
|
||||||
calib = [json.loads(line)["text"] for line in f]
|
calib = []
|
||||||
|
for line in f:
|
||||||
|
msgs = json.loads(line)["messages"]
|
||||||
|
text = tokenizer.apply_chat_template(msgs, tokenize=False)
|
||||||
|
calib.append(text)
|
||||||
|
|
||||||
print(f"Calibration samples: {len(calib)}")
|
print(f"Calibration samples: {len(calib)}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user