fnlp
/

moss-moon-003-sft-int4

Text Generation

Model card Files Files and versions

Hzfinfdu commited on Apr 26, 2023

Commit

e3f0d7e

·

1 Parent(s): 45eceaf

Update quantization.py

Files changed (1) hide show

quantization.py +3 -7

quantization.py CHANGED Viewed

@@ -3,6 +3,9 @@ import torch
 import torch.nn as nn
 from torch.cuda.amp import custom_bwd, custom_fwd
 import math
 def find_layers(module, layers=[nn.Conv2d, nn.Linear], name=''):
@@ -16,13 +19,6 @@ def find_layers(module, layers=[nn.Conv2d, nn.Linear], name=''):
     return res
-try:
-    import triton
-    import triton.language as tl
-    from .custom_autotune import *
-except:
-    print('triton not installed. Run `pip install triton` to load quantized version of MOSS.')
 # code based https://github.com/fpgaminer/GPTQ-triton
 @autotune(
     configs=[

 import torch.nn as nn
 from torch.cuda.amp import custom_bwd, custom_fwd
 import math
+import triton
+import triton.language as tl
+from .custom_autotune import *
 def find_layers(module, layers=[nn.Conv2d, nn.Linear], name=''):
     return res
 # code based https://github.com/fpgaminer/GPTQ-triton
 @autotune(
     configs=[