erwold commited on
Commit
3307da6
·
1 Parent(s): bc9137b

Initial Commit

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -15,6 +15,7 @@ import os
15
  # 设置环境变量,强制禁用 accelerate 的显存管理
16
  os.environ["ACCELERATE_USE_MEMORY_EFFICIENT_ATTENTION"] = "false"
17
  os.environ["ACCELERATE_DISABLE_MEMORY_EFFICIENT_ATTENTION"] = "1"
 
18
 
19
  from qwen2_vl.modeling_qwen2_vl import Qwen2VLSimplifiedModel
20
 
@@ -60,6 +61,9 @@ class FluxInterface:
60
  return
61
 
62
  logger.info("Starting model loading...")
 
 
 
63
 
64
  # Load FLUX components
65
  tokenizer = CLIPTokenizer.from_pretrained(self.MODEL_ID, subfolder="flux/tokenizer")
 
15
  # 设置环境变量,强制禁用 accelerate 的显存管理
16
  os.environ["ACCELERATE_USE_MEMORY_EFFICIENT_ATTENTION"] = "false"
17
  os.environ["ACCELERATE_DISABLE_MEMORY_EFFICIENT_ATTENTION"] = "1"
18
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,garbage_collection_threshold:0.6,max_split_size_mb:512"
19
 
20
  from qwen2_vl.modeling_qwen2_vl import Qwen2VLSimplifiedModel
21
 
 
61
  return
62
 
63
  logger.info("Starting model loading...")
64
+ # 3. 显式设置 PyTorch 缓存分配器的行为
65
+ torch.cuda.set_per_process_memory_fraction(0.95) # 允许使用95%的显存
66
+ torch.cuda.max_memory_allocated = lambda *args, **kwargs: 0 # 忽略已分配内存的限制
67
 
68
  # Load FLUX components
69
  tokenizer = CLIPTokenizer.from_pretrained(self.MODEL_ID, subfolder="flux/tokenizer")