fix: parse tool_call arguments from JSON string to dict for Qwen3.5 template
This commit is contained in:
+25
-1
@@ -54,9 +54,33 @@ model = FastLanguageModel.get_peft_model(
|
|||||||
# --- Dataset ---
|
# --- Dataset ---
|
||||||
ds = load_dataset("json", data_files=DATA, split="train")
|
ds = load_dataset("json", data_files=DATA, split="train")
|
||||||
|
|
||||||
|
def fix_tool_calls(messages):
|
||||||
|
"""Parse tool_call arguments from JSON strings to dicts for Qwen3.5 template."""
|
||||||
|
import json as _json
|
||||||
|
fixed = []
|
||||||
|
for msg in messages:
|
||||||
|
msg = dict(msg)
|
||||||
|
if msg.get("tool_calls"):
|
||||||
|
new_tcs = []
|
||||||
|
for tc in msg["tool_calls"]:
|
||||||
|
tc = dict(tc)
|
||||||
|
if "function" in tc:
|
||||||
|
fn = dict(tc["function"])
|
||||||
|
if isinstance(fn.get("arguments"), str):
|
||||||
|
try:
|
||||||
|
fn["arguments"] = _json.loads(fn["arguments"])
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
fn["arguments"] = {"raw": fn["arguments"]}
|
||||||
|
tc["function"] = fn
|
||||||
|
new_tcs.append(tc)
|
||||||
|
msg["tool_calls"] = new_tcs
|
||||||
|
fixed.append(msg)
|
||||||
|
return fixed
|
||||||
|
|
||||||
def to_chatml(ex):
|
def to_chatml(ex):
|
||||||
|
messages = fix_tool_calls(ex["messages"])
|
||||||
text = tokenizer.apply_chat_template(
|
text = tokenizer.apply_chat_template(
|
||||||
ex["messages"], tokenize=False, add_generation_prompt=False
|
messages, tokenize=False, add_generation_prompt=False
|
||||||
)
|
)
|
||||||
return {"text": text}
|
return {"text": text}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user