# ── Specialist LoRA Training Pipeline ── # Base model: Qwen/Qwen3.5-27B # Run on: RunPod H100 or sin GB10 default_data := "bt7274_v3.jsonl" # ── Data Extraction ────────────────────────────────────────────────── # Extract specialist training data from opencode session DB extract: @echo "── Extracting specialist data from opencode DB ──" python extract_specialists.py --outdir data/ @echo "" @echo "── Mining git repos ──" python mine_repos.py --repos repos.json --outdir data/ @echo "" @echo "Done. Review data/*.jsonl before training." # Extract session data only (no git mining) extract-sessions: python extract_specialists.py --outdir data/ # Mine git repos only (no session extraction) extract-git: python mine_repos.py --repos repos.json --outdir data/ # Mine a single repo mine repo lang: python mine_repos.py --repo {{repo}} --lang {{lang}} --outdir data/ # ── Dataset Stats ──────────────────────────────────────────────────── # Show stats for all datasets stats: @echo "── Dataset Statistics ──" @for f in data/*.jsonl bt7274_v3.jsonl; do \ if [ -f "$$f" ]; then \ count=$$(wc -l < "$$f" | tr -d ' '); \ echo " $$f: $$count examples"; \ fi; \ done # Detailed stats for a specific dataset check file: python -c "\ from datasets import load_dataset; \ ds = load_dataset('json', data_files='{{file}}', split='train'); \ print(f'Examples: {len(ds)}'); \ roles = {}; \ [roles.update({r: roles.get(r,0)+1}) for ex in ds for m in ex['messages'] for r in [m['role']]]; \ print(f'Roles: {roles}'); \ lens = [sum(len(m.get('content','') or '') for m in ex['messages']) for ex in ds]; \ print(f'Avg chars/example: {sum(lens)//len(lens)}'); \ print(f'Max chars/example: {max(lens)}'); \ tc = sum(1 for ex in ds if any(m.get('tool_calls') for m in ex['messages'])); \ print(f'Tool-call examples: {tc} ({100*tc//len(ds)}%)'); \ " # ── Training ───────────────────────────────────────────────────────── # Train bt7274 persona adapter v4 (Hermes format, blocks, 802 examples) train-bt7274: python train_v4.py # Train bt7274 v3 (legacy) train-bt7274-v3: python train_qwen35_27b.py # Train a specialist adapter train name: python train_specialist.py --name {{name}} # Train all specialists in sequence train-all: @echo "── Training all specialist adapters ──" @echo "Order: oxidizer → prism → serpent → forge → swiftblade → trace" @echo "" python train_specialist.py --name oxidizer python train_specialist.py --name prism python train_specialist.py --name serpent python train_specialist.py --name forge python train_specialist.py --name swiftblade python train_specialist.py --name trace # Train with custom data path train-custom name data: python train_specialist.py --name {{name}} --data {{data}} # ── Serving ────────────────────────────────────────────────────────── # List trained adapters adapters: @echo "── Trained Adapters ──" @for d in adapters/*/; do \ if [ -f "$$d/adapter_model.safetensors" ]; then \ size=$$(du -sh "$$d/adapter_model.safetensors" | cut -f1); \ echo " ✓ $$(basename $$d) ($$size)"; \ else \ echo " ✗ $$(basename $$d) (no adapter_model.safetensors)"; \ fi; \ done # Transfer adapter to sin transfer name: @echo "── Transferring {{name}} to sin ──" @test -d "adapters/{{name}}" || (echo "ERROR: adapters/{{name}} not found" && exit 1) ssh madcat@192.168.88.108 "mkdir -p ~/models/loras/{{name}}" rsync -avP "adapters/{{name}}/" "madcat@192.168.88.108:~/models/loras/{{name}}/" @echo "✓ Transferred to sin:~/models/loras/{{name}}/" # Transfer all adapters to sin transfer-all: @for d in adapters/*/; do \ name=$$(basename "$$d"); \ if [ -f "$$d/adapter_model.safetensors" ]; then \ echo "── Transferring $$name ──"; \ ssh madcat@192.168.88.108 "mkdir -p ~/models/loras/$$name"; \ rsync -avP "$$d" "madcat@192.168.88.108:~/models/loras/$$name/"; \ fi; \ done # ── Utilities ──────────────────────────────────────────────────────── # Clean generated data (keeps hand-crafted datasets) clean-data: rm -rf data/*.jsonl @echo "Cleaned data/*.jsonl" # Clean trained adapters clean-adapters: rm -rf adapters/ @echo "Cleaned adapters/" # Full clean clean: clean-data clean-adapters