"""Merge LoRA adapter into base model for vLLM serving.""" from unsloth import FastLanguageModel MODEL = "unsloth/Qwen2.5-7B-Instruct-bnb-4bit" ADAPTER = "./bt7274-lora" MERGED = "./bt7274-merged" MAX_SEQ = 2048 print("Loading base model + adapter...") model, tokenizer = FastLanguageModel.from_pretrained( model_name=MODEL, max_seq_length=MAX_SEQ, load_in_4bit=True, dtype=None, ) # Load the LoRA adapter from peft import PeftModel model = PeftModel.from_pretrained(model, ADAPTER) print(f"Merging and saving to {MERGED}/ (16-bit, ~14GB)...") model.save_pretrained_merged(MERGED, tokenizer, save_method="merged_16bit") print("Done.")