53 lines
1.8 KiB
Makefile
53 lines
1.8 KiB
Makefile
venv := "/home/madcat/lora-train"
|
|
data := "bt7274_v2.jsonl"
|
|
|
|
# Train LoRA adapter (stop vLLM first if running)
|
|
train:
|
|
@echo "── BT-7274 LoRA training ──"
|
|
@test -f {{data}} || (echo "MISSING: {{data}}" && exit 1)
|
|
@echo "Data: {{data}} ($(wc -l < {{data}}) examples)"
|
|
@echo "Activating venv: {{venv}}"
|
|
bash -c 'source {{venv}}/bin/activate && python train.py'
|
|
|
|
# Dry run — load data, print stats, exit
|
|
check:
|
|
bash -c 'source {{venv}}/bin/activate && python -c "\
|
|
from datasets import load_dataset; \
|
|
ds = load_dataset(\"json\", data_files=\"{{data}}\", split=\"train\"); \
|
|
print(f\"Examples: {len(ds)}\"); \
|
|
roles = {}; \
|
|
[roles.update({r: roles.get(r,0)+1}) for ex in ds for m in ex[\"messages\"] for r in [m[\"role\"]]]; \
|
|
print(f\"Roles: {roles}\"); \
|
|
lens = [sum(len(m.get(\"content\",\"\") or \"\") for m in ex[\"messages\"]) for ex in ds]; \
|
|
print(f\"Avg chars/example: {sum(lens)//len(lens)}\"); \
|
|
print(f\"Max chars/example: {max(lens)}\"); \
|
|
"'
|
|
|
|
# Stop vLLM server (frees GPU for training)
|
|
stop-vllm:
|
|
-pkill -f "vllm.entrypoints"
|
|
@echo "vLLM stopped"
|
|
|
|
# Start vLLM server with v2 adapter
|
|
serve:
|
|
bash ~/start-vllm.sh
|
|
|
|
# Start vLLM with v2 adapter override
|
|
serve-v2:
|
|
@echo "Starting vLLM with v2 adapter..."
|
|
bash -c 'source /home/madcat/vllm-serve/bin/activate && \
|
|
python -m vllm.entrypoints.openai.api_server \
|
|
--model Qwen/Qwen2.5-7B-Instruct \
|
|
--quantization bitsandbytes \
|
|
--load-format bitsandbytes \
|
|
--enable-lora \
|
|
--lora-modules bt7274=/home/madcat/Projects/lora/bt7274-lora-v2 \
|
|
--max-lora-rank 16 \
|
|
--max-model-len 32768 \
|
|
--enforce-eager \
|
|
--enable-auto-tool-choice \
|
|
--tool-call-parser hermes \
|
|
--port 8000 \
|
|
> /home/madcat/vllm-bt7274.log 2>&1 &'
|
|
@echo "vLLM started (log: ~/vllm-bt7274.log)"
|