1inkusFace commited on
Commit
a0a145a
·
verified ·
1 Parent(s): 61e9845

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -4,18 +4,21 @@ import torch
4
  import gradio as gr
5
  import os
6
 
 
 
 
 
 
7
  torch.backends.cuda.matmul.allow_tf32 = False
8
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
9
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
10
  torch.backends.cudnn.allow_tf32 = False
11
  torch.backends.cudnn.deterministic = False
12
  torch.backends.cudnn.benchmark = False
13
- #torch.backends.cuda.preferred_blas_library="cublas"
14
- # torch.backends.cuda.preferred_linalg_library="cusolver"
15
 
16
  torch.set_float32_matmul_precision("highest")
17
- os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
18
- os.environ["SAFETENSORS_FAST_GPU"] = "1"
19
 
20
  model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
21
 
 
4
  import gradio as gr
5
  import os
6
 
7
+ os.putenv('TORCH_LINALG_PREFER_CUSOLVER','1')
8
+ os.putenv('PYTORCH_CUDA_ALLOC_CONF','max_split_size_mb:128')
9
+ os.environ["SAFETENSORS_FAST_GPU"] = "1"
10
+ os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
11
+
12
  torch.backends.cuda.matmul.allow_tf32 = False
13
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
14
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
15
  torch.backends.cudnn.allow_tf32 = False
16
  torch.backends.cudnn.deterministic = False
17
  torch.backends.cudnn.benchmark = False
18
+ torch.backends.cuda.preferred_blas_library="cublas"
19
+ torch.backends.cuda.preferred_linalg_library="cusolver"
20
 
21
  torch.set_float32_matmul_precision("highest")
 
 
22
 
23
  model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
24