init: bt7274 lora training — v1 adapter, v2 dataset (500 examples), justfile

2026-05-25 16:37:30 +02:00
commit 9bcf1c2d9a
12 changed files with 758718 additions and 0 deletions
@@ -0,0 +1,52 @@
+venv := "/home/madcat/lora-train"
+data := "bt7274_v2.jsonl"
+
+# Train LoRA adapter (stop vLLM first if running)
+train:
+    @echo "── BT-7274 LoRA training ──"
+    @test -f {{data}} || (echo "MISSING: {{data}}" && exit 1)
+    @echo "Data: {{data}} ($(wc -l < {{data}}) examples)"
+    @echo "Activating venv: {{venv}}"
+    bash -c 'source {{venv}}/bin/activate && python train.py'
+
+# Dry run — load data, print stats, exit
+check:
+    bash -c 'source {{venv}}/bin/activate && python -c "\
+    from datasets import load_dataset; \
+    ds = load_dataset(\"json\", data_files=\"{{data}}\", split=\"train\"); \
+    print(f\"Examples: {len(ds)}\"); \
+    roles = {}; \
+    [roles.update({r: roles.get(r,0)+1}) for ex in ds for m in ex[\"messages\"] for r in [m[\"role\"]]]; \
+    print(f\"Roles: {roles}\"); \
+    lens = [sum(len(m.get(\"content\",\"\") or \"\") for m in ex[\"messages\"]) for ex in ds]; \
+    print(f\"Avg chars/example: {sum(lens)//len(lens)}\"); \
+    print(f\"Max chars/example: {max(lens)}\"); \
+    "'
+
+# Stop vLLM server (frees GPU for training)
+stop-vllm:
+    -pkill -f "vllm.entrypoints"
+    @echo "vLLM stopped"
+
+# Start vLLM server with v2 adapter
+serve:
+    bash ~/start-vllm.sh
+
+# Start vLLM with v2 adapter override
+serve-v2:
+    @echo "Starting vLLM with v2 adapter..."
+    bash -c 'source /home/madcat/vllm-serve/bin/activate && \
+    python -m vllm.entrypoints.openai.api_server \
+      --model Qwen/Qwen2.5-7B-Instruct \
+      --quantization bitsandbytes \
+      --load-format bitsandbytes \
+      --enable-lora \
+      --lora-modules bt7274=/home/madcat/Projects/lora/bt7274-lora-v2 \
+      --max-lora-rank 16 \
+      --max-model-len 32768 \
+      --enforce-eager \
+      --enable-auto-tool-choice \
+      --tool-call-parser hermes \
+      --port 8000 \
+      > /home/madcat/vllm-bt7274.log 2>&1 &'
+    @echo "vLLM started (log: ~/vllm-bt7274.log)"