187 lines
6.2 KiB
Python
187 lines
6.2 KiB
Python
"""LoRA training smoke test — Qwen3-0.6B on RTX 2000 Ada.
|
|
|
|
Minimal training script to verify:
|
|
1. GPU access works
|
|
2. unsloth LoRA training pipeline works
|
|
3. Model saves correctly
|
|
|
|
Usage:
|
|
# Inside madcat-ml container on junkpile:
|
|
python smoke_test.py
|
|
|
|
Expected runtime: <5 min
|
|
Expected VRAM: ~3-4 GB
|
|
"""
|
|
|
|
from unsloth import FastLanguageModel
|
|
from trl import SFTTrainer, SFTConfig
|
|
from datasets import load_dataset
|
|
import torch
|
|
import json
|
|
import os
|
|
|
|
# ── Config ──────────────────────────────────────────────────────────────
|
|
MODEL = "Qwen/Qwen3-0.6B" # Tiny model for smoke testing
|
|
MAX_SEQ = 2048 # Short sequences
|
|
RANK = 8 # Small LoRA rank
|
|
ALPHA = 8
|
|
DATA = "./bt7274_v4.jsonl"
|
|
OUT = "./smoke-test-lora"
|
|
EPOCHS = 1 # Single epoch
|
|
BATCH = 1
|
|
GRAD_ACCUM = 2 # Minimal effective batch
|
|
LR = 1e-4
|
|
MAX_EXAMPLES = 20 # Only use first 20 examples
|
|
|
|
# ── Load model (bf16, NOT 4-bit) ───────────────────────────────────────
|
|
print("Loading model...")
|
|
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
model_name=MODEL,
|
|
max_seq_length=MAX_SEQ,
|
|
load_in_4bit=False,
|
|
load_in_16bit=True,
|
|
full_finetuning=False,
|
|
dtype=torch.bfloat16,
|
|
)
|
|
|
|
print(f"✓ Model loaded: {MODEL}")
|
|
print(f" CUDA available: {torch.cuda.is_available()}")
|
|
if torch.cuda.is_available():
|
|
print(f" GPU: {torch.cuda.get_device_name(0)}")
|
|
print(f" VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
|
|
|
|
# ── LoRA adapter ───────────────────────────────────────────────────────
|
|
print("\nConfiguring LoRA...")
|
|
model = FastLanguageModel.get_peft_model(
|
|
model,
|
|
r=RANK,
|
|
lora_alpha=ALPHA,
|
|
lora_dropout=0,
|
|
target_modules=[
|
|
"q_proj", "k_proj", "v_proj", "o_proj",
|
|
"gate_proj", "up_proj", "down_proj",
|
|
],
|
|
bias="none",
|
|
use_gradient_checkpointing="unsloth",
|
|
random_state=42,
|
|
max_seq_length=MAX_SEQ,
|
|
)
|
|
|
|
print(f"✓ LoRA configured: r={RANK}, alpha={ALPHA}")
|
|
|
|
# ── Dataset ────────────────────────────────────────────────────────────
|
|
print(f"\nLoading dataset: {DATA}")
|
|
|
|
def fix_tool_calls(messages):
|
|
"""Parse tool_call arguments from JSON strings to dicts."""
|
|
fixed = []
|
|
for msg in messages:
|
|
msg = dict(msg)
|
|
if msg.get("tool_calls"):
|
|
new_tcs = []
|
|
for tc in msg["tool_calls"]:
|
|
tc = dict(tc)
|
|
if "function" in tc:
|
|
fn = dict(tc["function"])
|
|
if isinstance(fn.get("arguments"), str):
|
|
try:
|
|
fn["arguments"] = json.loads(fn["arguments"])
|
|
except (ValueError, TypeError):
|
|
fn["arguments"] = {"raw": fn["arguments"]}
|
|
tc["function"] = fn
|
|
new_tcs.append(tc)
|
|
msg["tool_calls"] = new_tcs
|
|
fixed.append(msg)
|
|
return fixed
|
|
|
|
def load_and_format(path, max_examples=None):
|
|
"""Load JSONL and format with chat template."""
|
|
from datasets import Dataset
|
|
_enc = tokenizer.tokenizer if hasattr(tokenizer, 'tokenizer') else tokenizer
|
|
texts = []
|
|
skipped = 0
|
|
|
|
with open(path) as f:
|
|
for i, line in enumerate(f):
|
|
if max_examples and i >= max_examples:
|
|
break
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
row = json.loads(line)
|
|
messages = fix_tool_calls(row["messages"])
|
|
text = tokenizer.apply_chat_template(
|
|
messages,
|
|
tokenize=False,
|
|
add_generation_prompt=False,
|
|
)
|
|
if len(_enc.encode(text)) <= MAX_SEQ:
|
|
texts.append(text)
|
|
else:
|
|
skipped += 1
|
|
|
|
if skipped:
|
|
print(f" ⚠ Filtered {skipped} examples exceeding {MAX_SEQ} tokens")
|
|
|
|
return Dataset.from_dict({"text": texts})
|
|
|
|
ds = load_and_format(DATA, max_examples=MAX_EXAMPLES)
|
|
|
|
steps = (len(ds) * EPOCHS) // (BATCH * GRAD_ACCUM)
|
|
print(f"✓ Dataset: {len(ds)} examples")
|
|
print(f" Epochs: {EPOCHS}")
|
|
print(f" Effective batch size: {BATCH * GRAD_ACCUM}")
|
|
print(f" Estimated steps: {steps}")
|
|
|
|
# ── Train ──────────────────────────────────────────────────────────────
|
|
print("\nStarting training...")
|
|
print("=" * 60)
|
|
|
|
trainer = SFTTrainer(
|
|
model=model,
|
|
tokenizer=tokenizer,
|
|
train_dataset=ds,
|
|
args=SFTConfig(
|
|
output_dir=OUT,
|
|
per_device_train_batch_size=BATCH,
|
|
gradient_accumulation_steps=GRAD_ACCUM,
|
|
num_train_epochs=EPOCHS,
|
|
learning_rate=LR,
|
|
bf16=True,
|
|
logging_steps=2,
|
|
save_steps=999999, # Don't save checkpoints during training
|
|
warmup_ratio=0.1,
|
|
optim="adamw_torch",
|
|
seed=42,
|
|
report_to="none",
|
|
max_seq_length=MAX_SEQ,
|
|
dataset_num_proc=1,
|
|
),
|
|
)
|
|
|
|
trainer.train()
|
|
|
|
print("=" * 60)
|
|
print("✓ Training complete")
|
|
|
|
# ── Save adapter ───────────────────────────────────────────────────────
|
|
print(f"\nSaving adapter to {OUT}/")
|
|
model.save_pretrained(OUT)
|
|
tokenizer.save_pretrained(OUT)
|
|
|
|
# Verify saved files
|
|
adapter_path = os.path.join(OUT, "adapter_model.safetensors")
|
|
if os.path.exists(adapter_path):
|
|
size_mb = os.path.getsize(adapter_path) / 1e6
|
|
print(f"✓ Adapter saved: {size_mb:.2f} MB")
|
|
else:
|
|
print("✗ ERROR: adapter_model.safetensors not found")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("SMOKE TEST PASSED")
|
|
print("=" * 60)
|
|
print(f"\nAdapter location: {OUT}/")
|
|
print(f"Model: {MODEL}")
|
|
print(f"Examples: {len(ds)}")
|
|
print(f"LoRA rank: {RANK}")
|