neuralmagic
/

granite-3.1-2b-instruct-quantized.w4a16

Text Generation

Inference Endpoints

compressed-tensors

Model card Files Files and versions Community

Shubhra Pandit commited on Jan 13

Commit

7c8cecf

·

1 Parent(s): 1fd4e36

Update model files

Files changed (4) hide show

config.json +4 -4
model.safetensors +2 -2
recipe.yaml +2 -2
tokenizer.json +1 -6

config.json CHANGED Viewed

@@ -30,10 +30,10 @@
           "Linear"
         ],
         "weights": {
-          "actorder": "group",
           "block_structure": null,
           "dynamic": false,
-          "group_size": 128,
           "num_bits": 4,
           "observer": "mse",
           "observer_kwargs": {},
@@ -44,7 +44,7 @@
       }
     },
     "format": "pack-quantized",
-    "global_compression_ratio": 2.0800281480274516,
     "ignore": [
       "lm_head"
     ],
@@ -53,7 +53,7 @@
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 0.13778556247646417,
       "ignore": [],
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured",

           "Linear"
         ],
         "weights": {
+          "actorder": null,
           "block_structure": null,
           "dynamic": false,
+          "group_size": 64,
           "num_bits": 4,
           "observer": "mse",
           "observer_kwargs": {},
       }
     },
     "format": "pack-quantized",
+    "global_compression_ratio": 2.0771812517233883,
     "ignore": [
       "lm_head"
     ],
     "quantization_status": "compressed",
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 0.13633800160480383,
       "ignore": [],
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c42b5cdd5beb78f6424aeb76d144e000fb2effa6276fce51f387bfad9e4549e
-size 1660786656

 version https://git-lfs.github.com/spec/v1
+oid sha256:80eacfccfdee065bac8252fc7e806717b506678e7e35090eb0b3246380e9b4fb
+size 1695488752

recipe.yaml CHANGED Viewed

@@ -6,6 +6,6 @@ quant_stage:
       ignore: [lm_head]
       config_groups:
         group_0:
-          weights: {num_bits: 4, type: int, symmetric: true, strategy: group, group_size: 128,
-            actorder: group, observer: mse}
           targets: [Linear]

       ignore: [lm_head]
       config_groups:
         group_0:
+          weights: {num_bits: 4, type: int, symmetric: true, strategy: group, group_size: 64,
+            observer: mse}
           targets: [Linear]

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 8196,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {