Steven10429 commited on
Commit
6b0e51f
·
1 Parent(s): ee65134
Files changed (1) hide show
  1. app.py +35 -24
app.py CHANGED
@@ -1,18 +1,22 @@
1
  import os
2
  import torch
3
- import psutil
4
- from pathlib import Path
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
  from peft import PeftModel, PeftConfig
7
  from huggingface_hub import login, create_repo, HfApi
8
  import gradio as gr
9
- import queue
10
  import time
11
  import shutil
12
  from gradio_log import Log
13
  import logging
14
 
15
 
 
 
 
 
 
 
 
16
  # 全局日志
17
  log = logging.getLogger("space_convert")
18
  log.setLevel(logging.INFO)
@@ -45,31 +49,37 @@ def get_model_size_in_gb(model_name):
45
  def check_system_resources(model_name):
46
  """检查系统资源,决定使用 CPU 或 GPU"""
47
  log.info("Checking system resources...")
48
- system_memory = psutil.virtual_memory()
49
- total_memory_gb = system_memory.total / (1024 ** 3)
50
- log.info(f"Total system memory: {total_memory_gb:.1f}GB")
51
 
52
  model_size_gb = get_model_size_in_gb(model_name)
53
- required_memory_gb = model_size_gb * 2.5 # 预留额外内存
 
 
 
54
  log.info(f"Estimated required memory for model: {required_memory_gb:.1f}GB")
55
 
56
- if torch.cuda.is_available():
57
- gpu_name = torch.cuda.get_device_name(0)
58
- gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
59
- log.info(f"Detected GPU: {gpu_name} with {gpu_memory_gb:.1f}GB memory")
60
- if gpu_memory_gb >= required_memory_gb:
61
- log.info("✅ Sufficient GPU memory available; using GPU.")
62
- return "cuda", gpu_memory_gb
63
- else:
64
- log.warning(f"⚠️ Insufficient GPU memory (requires {required_memory_gb:.1f}GB, found {gpu_memory_gb:.1f}GB).")
65
- else:
66
- log.error("❌ No GPU detected.")
 
67
 
68
- if total_memory_gb >= required_memory_gb:
69
  log.info("✅ Sufficient CPU memory available; using CPU.")
70
- return "cpu", total_memory_gb
71
  else:
72
- raise MemoryError(f" Insufficient system memory (requires {required_memory_gb:.1f}GB, available {available_memory_gb:.1f}GB).")
 
 
 
73
 
74
  @timeit
75
  def setup_environment(model_name):
@@ -114,19 +124,20 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
114
  """
115
  os.makedirs("temp", exist_ok=True)
116
  log.info("Loading base model...")
117
- model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto", trust_remote_code=True, offload_folder="temp")
118
  log.info("Loading adapter tokenizer...")
119
- adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto", offload_folder="temp")
120
  log.info("Resizing token embeddings...")
121
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
122
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
123
  log.info("Loading LoRA adapter...")
124
- peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto", trust_remote_code=True, offload_folder="temp")
125
  log.info("Merging and unloading model...")
126
  model = peft_model.merge_and_unload()
127
  log.info("Saving model...")
128
  model.save_pretrained(output_dir)
129
  adapter_tokenizer.save_pretrained(output_dir)
 
130
  return output_dir
131
 
132
  @timeit
 
1
  import os
2
  import torch
 
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel, PeftConfig
5
  from huggingface_hub import login, create_repo, HfApi
6
  import gradio as gr
 
7
  import time
8
  import shutil
9
  from gradio_log import Log
10
  import logging
11
 
12
 
13
+ MEMORY = int(os.getenv("MEMORY", 16)[:-2]) # 64Gi
14
+ CPU_CORES = int(os.getenv("CPU_CORES", 4)) # 4
15
+ SPACE_AUTHOR_NAME = os.getenv("SPACE_AUTHOR_NAME", "Steven10429") # str
16
+ SPACE_REPO_NAME = os.getenv("SPACE_REPO_NAME", "apply_lora_and_quantize") # str
17
+ SPACE_ID = os.getenv("SPACE_ID", "apply_lora_and_quantize") # str
18
+
19
+
20
  # 全局日志
21
  log = logging.getLogger("space_convert")
22
  log.setLevel(logging.INFO)
 
49
  def check_system_resources(model_name):
50
  """检查系统资源,决定使用 CPU 或 GPU"""
51
  log.info("Checking system resources...")
52
+ log.info(f"Total CPU cores: {CPU_CORES}")
53
+ log.info(f"Total system memory: {MEMORY}GB")
 
54
 
55
  model_size_gb = get_model_size_in_gb(model_name)
56
+ required_memory_gb_16bit = model_size_gb * 1.5
57
+ required_memory_gb = required_memory_gb_16bit
58
+
59
+
60
  log.info(f"Estimated required memory for model: {required_memory_gb:.1f}GB")
61
 
62
+ # if torch.cuda.is_available(): # failed with torch complie without GPU FLAG
63
+ # gpu_name = torch.cuda.get_device_name(0)
64
+ # gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
65
+ # log.info(f"Detected GPU: {gpu_name} with {gpu_memory_gb:.1f}GB memory")
66
+ # if gpu_memory_gb >= required_memory_gb:
67
+ # log.info("✅ Sufficient GPU memory available; using GPU.")
68
+ # return "cuda", gpu_memory_gb
69
+ # else:
70
+ # log.warning(f"⚠️ Insufficient GPU memory (requires {required_memory_gb:.1f}GB, found {gpu_memory_gb:.1f}GB).")
71
+ # else:
72
+ # log.error("❌ No GPU detected.")
73
+ # just use CPU, it's enough for merge and quantize
74
 
75
+ if MEMORY >= required_memory_gb:
76
  log.info("✅ Sufficient CPU memory available; using CPU.")
77
+ return "cpu", MEMORY
78
  else:
79
+ log.warning(f"⚠️ Insufficient CPU memory (requires {required_memory_gb:.1f}GB, found {MEMORY}GB).")
80
+ log.error("❌ No CPU detected.")
81
+ log.error("Will try low memory mode, but it may fail.")
82
+ return "cpu", MEMORY
83
 
84
  @timeit
85
  def setup_environment(model_name):
 
124
  """
125
  os.makedirs("temp", exist_ok=True)
126
  log.info("Loading base model...")
127
+ model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
128
  log.info("Loading adapter tokenizer...")
129
+ adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
130
  log.info("Resizing token embeddings...")
131
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
132
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
133
  log.info("Loading LoRA adapter...")
134
+ peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto", force_download=True, trust_remote_code=True, torch_dtype=torch.float16)
135
  log.info("Merging and unloading model...")
136
  model = peft_model.merge_and_unload()
137
  log.info("Saving model...")
138
  model.save_pretrained(output_dir)
139
  adapter_tokenizer.save_pretrained(output_dir)
140
+ del model, peft_model
141
  return output_dir
142
 
143
  @timeit