venv := "/home/madcat/lora-train" data := "bt7274_v2.jsonl" # Train LoRA adapter (stop vLLM first if running) train: @echo "── BT-7274 LoRA training ──" @test -f {{data}} || (echo "MISSING: {{data}}" && exit 1) @echo "Data: {{data}} ($(wc -l < {{data}}) examples)" @echo "Activating venv: {{venv}}" bash -c 'source {{venv}}/bin/activate && python train.py' # Dry run — load data, print stats, exit check: bash -c 'source {{venv}}/bin/activate && python -c "\ from datasets import load_dataset; \ ds = load_dataset(\"json\", data_files=\"{{data}}\", split=\"train\"); \ print(f\"Examples: {len(ds)}\"); \ roles = {}; \ [roles.update({r: roles.get(r,0)+1}) for ex in ds for m in ex[\"messages\"] for r in [m[\"role\"]]]; \ print(f\"Roles: {roles}\"); \ lens = [sum(len(m.get(\"content\",\"\") or \"\") for m in ex[\"messages\"]) for ex in ds]; \ print(f\"Avg chars/example: {sum(lens)//len(lens)}\"); \ print(f\"Max chars/example: {max(lens)}\"); \ "' # Stop vLLM server (frees GPU for training) stop-vllm: -pkill -f "vllm.entrypoints" @echo "vLLM stopped" # Start vLLM server with v2 adapter serve: bash ~/start-vllm.sh # Start vLLM with v2 adapter override serve-v2: @echo "Starting vLLM with v2 adapter..." bash -c 'source /home/madcat/vllm-serve/bin/activate && \ python -m vllm.entrypoints.openai.api_server \ --model Qwen/Qwen2.5-7B-Instruct \ --quantization bitsandbytes \ --load-format bitsandbytes \ --enable-lora \ --lora-modules bt7274=/home/madcat/Projects/lora/bt7274-lora-v2 \ --max-lora-rank 16 \ --max-model-len 32768 \ --enforce-eager \ --enable-auto-tool-choice \ --tool-call-parser hermes \ --port 8000 \ > /home/madcat/vllm-bt7274.log 2>&1 &' @echo "vLLM started (log: ~/vllm-bt7274.log)"