add training scripts: v3, qwen3-8b, qlora-7b
This commit is contained in:
@@ -0,0 +1,202 @@
|
||||
"""QLoRA smoke test — Qwen2.5-7B-Instruct on RTX 2000 Ada (16 GB).
|
||||
|
||||
Adapted from train_v4.py for junkpile's 16 GB VRAM budget.
|
||||
Uses bitsandbytes 4-bit quantization (QLoRA) instead of bf16 LoRA.
|
||||
|
||||
Key differences from train_v4.py:
|
||||
- Model: Qwen2.5-7B-Instruct (not Qwen3.5-27B)
|
||||
- QLoRA: load_in_4bit=True (bnb), not bf16
|
||||
- Shorter sequences: 2048 (16 GB ceiling)
|
||||
- Smaller batching: grad_accum 4 (fits memory)
|
||||
- 100 examples only (pipeline test, not quality)
|
||||
- 1 epoch (smoke test speed)
|
||||
|
||||
Memory estimate:
|
||||
Model (4-bit bnb) ~5 GB
|
||||
LoRA adapters (r=16) ~100 MB
|
||||
Optimizer (adamw_8bit) ~200 MB
|
||||
Activations (grad ckpt, seq 2048) ~4-6 GB
|
||||
Total ~10-12 GB
|
||||
|
||||
Usage (inside madcat-ml container on junkpile):
|
||||
cd /workspace/lora
|
||||
python3 train_qlora_7b.py
|
||||
|
||||
Expected runtime: <10 min
|
||||
Expected VRAM peak: ~10-12 GB
|
||||
"""
|
||||
|
||||
from unsloth import FastLanguageModel
|
||||
from trl import SFTTrainer, SFTConfig
|
||||
import torch
|
||||
import json
|
||||
import os
|
||||
|
||||
# ── Config ───────────────────────────────────────────────────────────
|
||||
MODEL = "unsloth/Qwen2.5-7B-Instruct-bnb-4bit"
|
||||
MAX_SEQ = 2048
|
||||
RANK = 16
|
||||
ALPHA = 16
|
||||
DATA = "./bt7274_v4.jsonl"
|
||||
OUT = "./qlora-qwen25-7b-smoke"
|
||||
EPOCHS = 1
|
||||
BATCH = 1
|
||||
GRAD_ACCUM = 4
|
||||
LR = 1e-4
|
||||
WARMUP_RATIO = 0.1
|
||||
MAX_EXAMPLES = 100
|
||||
|
||||
# ── Load model (4-bit QLoRA) ────────────────────────────────────────
|
||||
print(f"Loading {MODEL}...")
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name=MODEL,
|
||||
max_seq_length=MAX_SEQ,
|
||||
load_in_4bit=True,
|
||||
dtype=torch.bfloat16,
|
||||
)
|
||||
|
||||
print(f"Model loaded: {MODEL}")
|
||||
print(f" CUDA: {torch.cuda.get_device_name(0)}")
|
||||
print(f" VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")
|
||||
print(f" Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
|
||||
|
||||
# ── LoRA adapter ────────────────────────────────────────────────────
|
||||
model = FastLanguageModel.get_peft_model(
|
||||
model,
|
||||
r=RANK,
|
||||
lora_alpha=ALPHA,
|
||||
lora_dropout=0,
|
||||
target_modules=[
|
||||
"q_proj", "k_proj", "v_proj", "o_proj",
|
||||
"gate_proj", "up_proj", "down_proj",
|
||||
],
|
||||
bias="none",
|
||||
use_gradient_checkpointing="unsloth",
|
||||
random_state=42,
|
||||
max_seq_length=MAX_SEQ,
|
||||
)
|
||||
|
||||
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
total = sum(p.numel() for p in model.parameters())
|
||||
print(f"LoRA: r={RANK}, alpha={ALPHA}")
|
||||
print(f" Trainable: {trainable:,} / {total:,} ({100 * trainable / total:.2f}%)")
|
||||
|
||||
|
||||
# ── Dataset ─────────────────────────────────────────────────────────
|
||||
def fix_tool_calls(messages):
|
||||
"""Parse tool_call arguments from JSON strings to dicts."""
|
||||
fixed = []
|
||||
for msg in messages:
|
||||
msg = dict(msg)
|
||||
if msg.get("tool_calls"):
|
||||
new_tcs = []
|
||||
for tc in msg["tool_calls"]:
|
||||
tc = dict(tc)
|
||||
if "function" in tc:
|
||||
fn = dict(tc["function"])
|
||||
if isinstance(fn.get("arguments"), str):
|
||||
try:
|
||||
fn["arguments"] = json.loads(fn["arguments"])
|
||||
except (ValueError, TypeError):
|
||||
fn["arguments"] = {"raw": fn["arguments"]}
|
||||
tc["function"] = fn
|
||||
new_tcs.append(tc)
|
||||
msg["tool_calls"] = new_tcs
|
||||
fixed.append(msg)
|
||||
return fixed
|
||||
|
||||
|
||||
def load_and_format(path, max_examples=None):
|
||||
"""Load JSONL and format with chat template."""
|
||||
from datasets import Dataset
|
||||
_enc = tokenizer.tokenizer if hasattr(tokenizer, "tokenizer") else tokenizer
|
||||
texts = []
|
||||
skipped = 0
|
||||
with open(path) as f:
|
||||
for i, line in enumerate(f):
|
||||
if max_examples and i >= max_examples:
|
||||
break
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
row = json.loads(line)
|
||||
messages = fix_tool_calls(row["messages"])
|
||||
text = tokenizer.apply_chat_template(
|
||||
messages,
|
||||
tokenize=False,
|
||||
add_generation_prompt=False,
|
||||
)
|
||||
tok_len = len(_enc.encode(text))
|
||||
if tok_len <= MAX_SEQ:
|
||||
texts.append(text)
|
||||
else:
|
||||
skipped += 1
|
||||
if skipped:
|
||||
print(f" Filtered {skipped} examples exceeding {MAX_SEQ} tokens")
|
||||
return Dataset.from_dict({"text": texts})
|
||||
|
||||
|
||||
print(f"\nLoading dataset: {DATA} (first {MAX_EXAMPLES} examples)")
|
||||
ds = load_and_format(DATA, max_examples=MAX_EXAMPLES)
|
||||
|
||||
steps = (len(ds) * EPOCHS) // (BATCH * GRAD_ACCUM)
|
||||
print(f" Loaded: {len(ds)} examples")
|
||||
print(f" Epochs: {EPOCHS}, effective batch: {BATCH * GRAD_ACCUM}")
|
||||
print(f" Estimated steps: {steps}")
|
||||
|
||||
# ── Train ───────────────────────────────────────────────────────────
|
||||
print(f"\nTraining...")
|
||||
print("=" * 60)
|
||||
|
||||
trainer = SFTTrainer(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
train_dataset=ds,
|
||||
args=SFTConfig(
|
||||
output_dir=OUT,
|
||||
per_device_train_batch_size=BATCH,
|
||||
gradient_accumulation_steps=GRAD_ACCUM,
|
||||
num_train_epochs=EPOCHS,
|
||||
learning_rate=LR,
|
||||
bf16=True,
|
||||
logging_steps=5,
|
||||
save_steps=999999, # no mid-training checkpoints (smoke test)
|
||||
warmup_ratio=WARMUP_RATIO,
|
||||
optim="adamw_8bit", # 8-bit adam saves ~1 GB vs adamw_torch
|
||||
seed=42,
|
||||
report_to="none",
|
||||
max_seq_length=MAX_SEQ,
|
||||
dataset_num_proc=1,
|
||||
lr_scheduler_type="cosine",
|
||||
weight_decay=0.01,
|
||||
),
|
||||
)
|
||||
|
||||
result = trainer.train()
|
||||
|
||||
print("=" * 60)
|
||||
print(f"Training complete — loss: {result.training_loss:.4f}")
|
||||
print(f" Steps: {result.global_step}")
|
||||
print(f" Runtime: {result.metrics['train_runtime']:.0f}s")
|
||||
print(f" Peak VRAM: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
|
||||
|
||||
# ── Save LoRA adapter ──────────────────────────────────────────────
|
||||
print(f"\nSaving adapter to {OUT}/")
|
||||
model.save_pretrained(OUT)
|
||||
tokenizer.save_pretrained(OUT)
|
||||
|
||||
adapter_path = os.path.join(OUT, "adapter_model.safetensors")
|
||||
if os.path.exists(adapter_path):
|
||||
size_mb = os.path.getsize(adapter_path) / 1e6
|
||||
print(f" Adapter: {size_mb:.1f} MB")
|
||||
else:
|
||||
print(" ERROR: adapter_model.safetensors not found")
|
||||
|
||||
print(f"\n{'=' * 60}")
|
||||
print("SMOKE TEST PASSED")
|
||||
print(f"{'=' * 60}")
|
||||
print(f"\n Model: {MODEL}")
|
||||
print(f" Examples: {len(ds)}")
|
||||
print(f" LoRA: r={RANK}, alpha={ALPHA}")
|
||||
print(f" Peak VRAM: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
|
||||
print(f" Adapter: {OUT}/")
|
||||
@@ -0,0 +1,95 @@
|
||||
"""Qwen3-8B coding agent LoRA — trained on opencode tool-use + coding patterns.
|
||||
|
||||
Usage (on RunPod A100/H100):
|
||||
python train_qwen3_8b.py
|
||||
"""
|
||||
|
||||
from unsloth import FastLanguageModel
|
||||
from trl import SFTTrainer
|
||||
from transformers import TrainingArguments
|
||||
from datasets import load_dataset
|
||||
|
||||
# --- Config ---
|
||||
MODEL = "unsloth/Qwen3-8B-unsloth-bnb-4bit"
|
||||
MAX_SEQ = 4096
|
||||
RANK = 16
|
||||
ALPHA = 16
|
||||
DATA = "./bt7274_v3.jsonl"
|
||||
OUT = "./qwen3-8b-lora-v1"
|
||||
EPOCHS = 3
|
||||
BATCH = 4 # A100/H100 can handle bigger batches
|
||||
GRAD_ACCUM = 4 # effective batch = 16
|
||||
LR = 2e-4
|
||||
|
||||
# --- Load model (4-bit quantized) ---
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name=MODEL,
|
||||
max_seq_length=MAX_SEQ,
|
||||
load_in_4bit=True,
|
||||
dtype=None,
|
||||
)
|
||||
|
||||
# --- LoRA adapter ---
|
||||
model = FastLanguageModel.get_peft_model(
|
||||
model,
|
||||
r=RANK,
|
||||
lora_alpha=ALPHA,
|
||||
lora_dropout=0,
|
||||
target_modules=[
|
||||
"q_proj", "k_proj", "v_proj", "o_proj",
|
||||
"gate_proj", "up_proj", "down_proj",
|
||||
],
|
||||
bias="none",
|
||||
use_gradient_checkpointing="unsloth",
|
||||
random_state=42,
|
||||
)
|
||||
|
||||
# --- Dataset ---
|
||||
ds = load_dataset("json", data_files=DATA, split="train")
|
||||
|
||||
def to_chatml(ex):
|
||||
text = tokenizer.apply_chat_template(
|
||||
ex["messages"], tokenize=False, add_generation_prompt=False
|
||||
)
|
||||
return {"text": text}
|
||||
|
||||
ds = ds.map(to_chatml)
|
||||
|
||||
steps = (len(ds) * EPOCHS) // (BATCH * GRAD_ACCUM)
|
||||
print(f"Dataset: {len(ds)} examples")
|
||||
print(f"Epochs: {EPOCHS}, effective batch: {BATCH * GRAD_ACCUM}")
|
||||
print(f"Estimated steps: {steps}")
|
||||
print(f"LoRA: r={RANK}, alpha={ALPHA}")
|
||||
print(f"Max seq: {MAX_SEQ}")
|
||||
|
||||
# --- Train ---
|
||||
trainer = SFTTrainer(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
train_dataset=ds,
|
||||
args=TrainingArguments(
|
||||
output_dir=OUT,
|
||||
per_device_train_batch_size=BATCH,
|
||||
gradient_accumulation_steps=GRAD_ACCUM,
|
||||
num_train_epochs=EPOCHS,
|
||||
learning_rate=LR,
|
||||
bf16=True,
|
||||
logging_steps=5,
|
||||
save_steps=50,
|
||||
save_total_limit=2,
|
||||
warmup_steps=10,
|
||||
optim="adamw_8bit",
|
||||
seed=42,
|
||||
report_to="none",
|
||||
),
|
||||
dataset_text_field="text",
|
||||
max_seq_length=MAX_SEQ,
|
||||
packing=False,
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
|
||||
# --- Save LoRA adapter ---
|
||||
model.save_pretrained(OUT)
|
||||
tokenizer.save_pretrained(OUT)
|
||||
print(f"\nSaved LoRA adapter to {OUT}/")
|
||||
+97
@@ -0,0 +1,97 @@
|
||||
"""BT-7274 LoRA v3 — balanced tool-call + direct-response.
|
||||
|
||||
Usage:
|
||||
source ~/lora-train/bin/activate
|
||||
cd ~/Projects/lora
|
||||
python train_v3.py
|
||||
"""
|
||||
|
||||
from unsloth import FastLanguageModel
|
||||
from trl import SFTTrainer
|
||||
from transformers import TrainingArguments
|
||||
from datasets import load_dataset
|
||||
|
||||
# --- Config ---
|
||||
MODEL = "unsloth/Qwen2.5-7B-Instruct-bnb-4bit"
|
||||
MAX_SEQ = 4096
|
||||
RANK = 16
|
||||
ALPHA = 16
|
||||
DATA = "./bt7274_v3.jsonl"
|
||||
OUT = "./bt7274-lora-v3"
|
||||
EPOCHS = 3
|
||||
BATCH = 1
|
||||
GRAD_ACCUM = 8
|
||||
LR = 2e-4
|
||||
|
||||
# --- Load model (4-bit quantized) ---
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name=MODEL,
|
||||
max_seq_length=MAX_SEQ,
|
||||
load_in_4bit=True,
|
||||
dtype=None,
|
||||
)
|
||||
|
||||
# --- LoRA adapter ---
|
||||
model = FastLanguageModel.get_peft_model(
|
||||
model,
|
||||
r=RANK,
|
||||
lora_alpha=ALPHA,
|
||||
lora_dropout=0,
|
||||
target_modules=[
|
||||
"q_proj", "k_proj", "v_proj", "o_proj",
|
||||
"gate_proj", "up_proj", "down_proj",
|
||||
],
|
||||
bias="none",
|
||||
use_gradient_checkpointing="unsloth",
|
||||
random_state=42,
|
||||
)
|
||||
|
||||
# --- Dataset ---
|
||||
ds = load_dataset("json", data_files=DATA, split="train")
|
||||
|
||||
def to_chatml(ex):
|
||||
text = tokenizer.apply_chat_template(
|
||||
ex["messages"], tokenize=False, add_generation_prompt=False
|
||||
)
|
||||
return {"text": text}
|
||||
|
||||
ds = ds.map(to_chatml)
|
||||
|
||||
steps = (len(ds) * EPOCHS) // (BATCH * GRAD_ACCUM)
|
||||
print(f"Dataset: {len(ds)} examples")
|
||||
print(f"Epochs: {EPOCHS}, effective batch: {BATCH * GRAD_ACCUM}")
|
||||
print(f"Estimated steps: {steps}")
|
||||
print(f"LoRA: r={RANK}, alpha={ALPHA}")
|
||||
print(f"Max seq: {MAX_SEQ}")
|
||||
|
||||
# --- Train ---
|
||||
trainer = SFTTrainer(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
train_dataset=ds,
|
||||
args=TrainingArguments(
|
||||
output_dir=OUT,
|
||||
per_device_train_batch_size=BATCH,
|
||||
gradient_accumulation_steps=GRAD_ACCUM,
|
||||
num_train_epochs=EPOCHS,
|
||||
learning_rate=LR,
|
||||
bf16=True,
|
||||
logging_steps=5,
|
||||
save_steps=50,
|
||||
save_total_limit=2,
|
||||
warmup_steps=10,
|
||||
optim="adamw_8bit",
|
||||
seed=42,
|
||||
report_to="none",
|
||||
),
|
||||
dataset_text_field="text",
|
||||
max_seq_length=MAX_SEQ,
|
||||
packing=False,
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
|
||||
# --- Save LoRA adapter ---
|
||||
model.save_pretrained(OUT)
|
||||
tokenizer.save_pretrained(OUT)
|
||||
print(f"\nSaved LoRA adapter to {OUT}/")
|
||||
Reference in New Issue
Block a user