"""LoRA training smoke test — Qwen3-0.6B on RTX 2000 Ada. Minimal training script to verify: 1. GPU access works 2. unsloth LoRA training pipeline works 3. Model saves correctly Usage: # Inside madcat-ml container on junkpile: python smoke_test.py Expected runtime: <5 min Expected VRAM: ~3-4 GB """ from unsloth import FastLanguageModel from trl import SFTTrainer, SFTConfig from datasets import load_dataset import torch import json import os # ── Config ────────────────────────────────────────────────────────────── MODEL = "Qwen/Qwen3-0.6B" # Tiny model for smoke testing MAX_SEQ = 2048 # Short sequences RANK = 8 # Small LoRA rank ALPHA = 8 DATA = "./bt7274_v4.jsonl" OUT = "./smoke-test-lora" EPOCHS = 1 # Single epoch BATCH = 1 GRAD_ACCUM = 2 # Minimal effective batch LR = 1e-4 MAX_EXAMPLES = 20 # Only use first 20 examples # ── Load model (bf16, NOT 4-bit) ─────────────────────────────────────── print("Loading model...") model, tokenizer = FastLanguageModel.from_pretrained( model_name=MODEL, max_seq_length=MAX_SEQ, load_in_4bit=False, load_in_16bit=True, full_finetuning=False, dtype=torch.bfloat16, ) print(f"✓ Model loaded: {MODEL}") print(f" CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f" GPU: {torch.cuda.get_device_name(0)}") print(f" VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB") # ── LoRA adapter ─────────────────────────────────────────────────────── print("\nConfiguring LoRA...") model = FastLanguageModel.get_peft_model( model, r=RANK, lora_alpha=ALPHA, lora_dropout=0, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], bias="none", use_gradient_checkpointing="unsloth", random_state=42, max_seq_length=MAX_SEQ, ) print(f"✓ LoRA configured: r={RANK}, alpha={ALPHA}") # ── Dataset ──────────────────────────────────────────────────────────── print(f"\nLoading dataset: {DATA}") def fix_tool_calls(messages): """Parse tool_call arguments from JSON strings to dicts.""" fixed = [] for msg in messages: msg = dict(msg) if msg.get("tool_calls"): new_tcs = [] for tc in msg["tool_calls"]: tc = dict(tc) if "function" in tc: fn = dict(tc["function"]) if isinstance(fn.get("arguments"), str): try: fn["arguments"] = json.loads(fn["arguments"]) except (ValueError, TypeError): fn["arguments"] = {"raw": fn["arguments"]} tc["function"] = fn new_tcs.append(tc) msg["tool_calls"] = new_tcs fixed.append(msg) return fixed def load_and_format(path, max_examples=None): """Load JSONL and format with chat template.""" from datasets import Dataset _enc = tokenizer.tokenizer if hasattr(tokenizer, 'tokenizer') else tokenizer texts = [] skipped = 0 with open(path) as f: for i, line in enumerate(f): if max_examples and i >= max_examples: break line = line.strip() if not line: continue row = json.loads(line) messages = fix_tool_calls(row["messages"]) text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=False, ) if len(_enc.encode(text)) <= MAX_SEQ: texts.append(text) else: skipped += 1 if skipped: print(f" ⚠ Filtered {skipped} examples exceeding {MAX_SEQ} tokens") return Dataset.from_dict({"text": texts}) ds = load_and_format(DATA, max_examples=MAX_EXAMPLES) steps = (len(ds) * EPOCHS) // (BATCH * GRAD_ACCUM) print(f"✓ Dataset: {len(ds)} examples") print(f" Epochs: {EPOCHS}") print(f" Effective batch size: {BATCH * GRAD_ACCUM}") print(f" Estimated steps: {steps}") # ── Train ────────────────────────────────────────────────────────────── print("\nStarting training...") print("=" * 60) trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=ds, args=SFTConfig( output_dir=OUT, per_device_train_batch_size=BATCH, gradient_accumulation_steps=GRAD_ACCUM, num_train_epochs=EPOCHS, learning_rate=LR, bf16=True, logging_steps=2, save_steps=999999, # Don't save checkpoints during training warmup_ratio=0.1, optim="adamw_torch", seed=42, report_to="none", max_seq_length=MAX_SEQ, dataset_num_proc=1, ), ) trainer.train() print("=" * 60) print("✓ Training complete") # ── Save adapter ─────────────────────────────────────────────────────── print(f"\nSaving adapter to {OUT}/") model.save_pretrained(OUT) tokenizer.save_pretrained(OUT) # Verify saved files adapter_path = os.path.join(OUT, "adapter_model.safetensors") if os.path.exists(adapter_path): size_mb = os.path.getsize(adapter_path) / 1e6 print(f"✓ Adapter saved: {size_mb:.2f} MB") else: print("✗ ERROR: adapter_model.safetensors not found") print("\n" + "=" * 60) print("SMOKE TEST PASSED") print("=" * 60) print(f"\nAdapter location: {OUT}/") print(f"Model: {MODEL}") print(f"Examples: {len(ds)}") print(f"LoRA rank: {RANK}")