unausagi commited on
Commit
b82cda0
·
verified ·
1 Parent(s): 3eecdbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -13,13 +13,19 @@ HF_TOKEN = os.getenv("HF_TOKEN")
13
 
14
  def load_model(model_path):
15
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, token=HF_TOKEN)
 
 
 
 
 
 
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_path,
 
18
  trust_remote_code=True,
19
  token=HF_TOKEN,
20
- torch_dtype=torch.float16, # 強制 FP16,避免 FP8 問題
21
- device_map="auto", # 讓 transformers 自動決定使用 CPU/GPU
22
- revision="main"
23
  )
24
  return model, tokenizer
25
 
 
13
 
14
  def load_model(model_path):
15
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, token=HF_TOKEN)
16
+
17
+ # 先載入 config,手動刪除量化設定,防止 FP8 問題
18
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, token=HF_TOKEN)
19
+ if hasattr(config, "quantization_config"):
20
+ del config.quantization_config # 刪除量化配置,避免使用 FP8
21
+
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_path,
24
+ config=config, # 使用已移除量化的 config
25
  trust_remote_code=True,
26
  token=HF_TOKEN,
27
+ torch_dtype=torch.float16, # 強制 FP16,避免 FP8
28
+ device_map="auto",
 
29
  )
30
  return model, tokenizer
31