Steven10429 commited on
Commit
999165e
·
1 Parent(s): ebbc7fa
Files changed (1) hide show
  1. app.py +0 -15
app.py CHANGED
@@ -74,17 +74,12 @@ def check_system_resources(model_name):
74
 
75
  if MEMORY >= required_memory_gb:
76
  log.info("✅ Sufficient CPU memory available; using CPU.")
77
- <<<<<<< HEAD
78
  return "cpu", MEMORY
79
  else:
80
  log.warning(f"⚠️ Insufficient CPU memory (requires {required_memory_gb:.1f}GB, found {MEMORY}GB).")
81
  log.error("❌ No CPU detected.")
82
  log.error("Will try low memory mode, but it may fail.")
83
  return "cpu", MEMORY
84
- =======
85
- return "cpu", total_memory_gb
86
-
87
- >>>>>>> 1d6ffe4bce1a741111b16de1ba110e1ee56b92df
88
 
89
  @timeit
90
  def setup_environment(model_name):
@@ -129,24 +124,14 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
129
  """
130
  os.makedirs("temp", exist_ok=True)
131
  log.info("Loading base model...")
132
- <<<<<<< HEAD
133
  model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
134
  log.info("Loading adapter tokenizer...")
135
  adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
136
- =======
137
- model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto")
138
- log.info("Loading adapter tokenizer...")
139
- adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto")
140
- >>>>>>> 1d6ffe4bce1a741111b16de1ba110e1ee56b92df
141
  log.info("Resizing token embeddings...")
142
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
143
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
144
  log.info("Loading LoRA adapter...")
145
- <<<<<<< HEAD
146
  peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
147
- =======
148
- peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto")
149
- >>>>>>> 1d6ffe4bce1a741111b16de1ba110e1ee56b92df
150
  log.info("Merging and unloading model...")
151
  model = peft_model.merge_and_unload()
152
  log.info("Saving model...")
 
74
 
75
  if MEMORY >= required_memory_gb:
76
  log.info("✅ Sufficient CPU memory available; using CPU.")
 
77
  return "cpu", MEMORY
78
  else:
79
  log.warning(f"⚠️ Insufficient CPU memory (requires {required_memory_gb:.1f}GB, found {MEMORY}GB).")
80
  log.error("❌ No CPU detected.")
81
  log.error("Will try low memory mode, but it may fail.")
82
  return "cpu", MEMORY
 
 
 
 
83
 
84
  @timeit
85
  def setup_environment(model_name):
 
124
  """
125
  os.makedirs("temp", exist_ok=True)
126
  log.info("Loading base model...")
 
127
  model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
128
  log.info("Loading adapter tokenizer...")
129
  adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
 
 
 
 
 
130
  log.info("Resizing token embeddings...")
131
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
132
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
133
  log.info("Loading LoRA adapter...")
 
134
  peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
 
 
 
135
  log.info("Merging and unloading model...")
136
  model = peft_model.merge_and_unload()
137
  log.info("Saving model...")