Steven10429 commited on
Commit
90b81c5
·
1 Parent(s): 52db102

add offload

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -112,15 +112,16 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
112
  5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
113
  6. 调整合并模型嵌入层尺寸并保存
114
  """
 
115
  log.info("Loading base model...")
116
- model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
117
  log.info("Loading adapter tokenizer...")
118
- adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
119
  log.info("Resizing token embeddings...")
120
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
121
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
122
  log.info("Loading LoRA adapter...")
123
- peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
124
  log.info("Merging and unloading model...")
125
  model = peft_model.merge_and_unload()
126
  log.info("Saving model...")
@@ -347,7 +348,7 @@ def create_ui():
347
  )
348
  convert_btn = gr.Button("Start Conversion", variant="primary")
349
  with gr.Column():
350
- Log("convert.log", dark=True, xterm_font_size=12, height=480)
351
  convert_btn.click(
352
  fn=process_model,
353
  inputs=[base_model, lora_model, repo_name, quant_method, hf_token],
 
112
  5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
113
  6. 调整合并模型嵌入层尺寸并保存
114
  """
115
+ os.makedirs("temp", exist_ok=True)
116
  log.info("Loading base model...")
117
+ model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, trust_remote_code=True, device=device, offload_folder="temp")
118
  log.info("Loading adapter tokenizer...")
119
+ adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device=device, offload_folder="temp")
120
  log.info("Resizing token embeddings...")
121
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
122
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
123
  log.info("Loading LoRA adapter...")
124
+ peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, trust_remote_code=True, device=device, offload_folder="temp")
125
  log.info("Merging and unloading model...")
126
  model = peft_model.merge_and_unload()
127
  log.info("Saving model...")
 
348
  )
349
  convert_btn = gr.Button("Start Conversion", variant="primary")
350
  with gr.Column():
351
+ Log("convert.log", dark=True, xterm_font_size=12)
352
  convert_btn.click(
353
  fn=process_model,
354
  inputs=[base_model, lora_model, repo_name, quant_method, hf_token],