Commit
·
90b81c5
1
Parent(s):
52db102
add offload
Browse files
app.py
CHANGED
@@ -112,15 +112,16 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
|
|
112 |
5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
|
113 |
6. 调整合并模型嵌入层尺寸并保存
|
114 |
"""
|
|
|
115 |
log.info("Loading base model...")
|
116 |
-
model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
|
117 |
log.info("Loading adapter tokenizer...")
|
118 |
-
adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
|
119 |
log.info("Resizing token embeddings...")
|
120 |
added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
|
121 |
model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
|
122 |
log.info("Loading LoRA adapter...")
|
123 |
-
peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
|
124 |
log.info("Merging and unloading model...")
|
125 |
model = peft_model.merge_and_unload()
|
126 |
log.info("Saving model...")
|
@@ -347,7 +348,7 @@ def create_ui():
|
|
347 |
)
|
348 |
convert_btn = gr.Button("Start Conversion", variant="primary")
|
349 |
with gr.Column():
|
350 |
-
Log("convert.log", dark=True, xterm_font_size=12
|
351 |
convert_btn.click(
|
352 |
fn=process_model,
|
353 |
inputs=[base_model, lora_model, repo_name, quant_method, hf_token],
|
|
|
112 |
5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
|
113 |
6. 调整合并模型嵌入层尺寸并保存
|
114 |
"""
|
115 |
+
os.makedirs("temp", exist_ok=True)
|
116 |
log.info("Loading base model...")
|
117 |
+
model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, trust_remote_code=True, device=device, offload_folder="temp")
|
118 |
log.info("Loading adapter tokenizer...")
|
119 |
+
adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device=device, offload_folder="temp")
|
120 |
log.info("Resizing token embeddings...")
|
121 |
added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
|
122 |
model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
|
123 |
log.info("Loading LoRA adapter...")
|
124 |
+
peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, trust_remote_code=True, device=device, offload_folder="temp")
|
125 |
log.info("Merging and unloading model...")
|
126 |
model = peft_model.merge_and_unload()
|
127 |
log.info("Saving model...")
|
|
|
348 |
)
|
349 |
convert_btn = gr.Button("Start Conversion", variant="primary")
|
350 |
with gr.Column():
|
351 |
+
Log("convert.log", dark=True, xterm_font_size=12)
|
352 |
convert_btn.click(
|
353 |
fn=process_model,
|
354 |
inputs=[base_model, lora_model, repo_name, quant_method, hf_token],
|