Upload of AutoGPTQ quantized model

Files changed (8) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3.1-405B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -25,14 +25,14 @@
   "quantization_config": {
     "bits": 4,
     "checkpoint_format": "gptq",
-    "damp_percent": 0.01,
     "desc_act": true,
     "exponent_hinv": 4.0,
-    "group_size": -1,
     "model_file_base_name": null,
     "model_name_or_path": null,
     "quant_method": "gptq",
-    "shrink": 0.1,
     "static_groups": false,
     "sym": false,
     "true_sequential": true

 {
+  "_name_or_path": "meta-llama/Llama-3.1-405B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "quantization_config": {
     "bits": 4,
     "checkpoint_format": "gptq",
+    "damp_percent": 0.025,
     "desc_act": true,
     "exponent_hinv": 4.0,
+    "group_size": 128,
     "model_file_base_name": null,
     "model_name_or_path": null,
     "quant_method": "gptq",
+    "shrink": 0.0625,
     "static_groups": false,
     "sym": false,
     "true_sequential": true

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "weight_map": {
-        "lm_head.weight": "model_000.safetensors",
         "model.embed_tokens.weight": "model_000.safetensors",
         "model.layers.0.input_layernorm.weight": "model_000.safetensors",
         "model.layers.0.mlp.down_proj.g_idx": "model_000.safetensors",

 {
     "weight_map": {
+        "lm_head.weight": "model_001.safetensors",
         "model.embed_tokens.weight": "model_000.safetensors",
         "model.layers.0.input_layernorm.weight": "model_000.safetensors",
         "model.layers.0.mlp.down_proj.g_idx": "model_000.safetensors",

model_000.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b2c0207cc9b3c5ee37655efe94b64822fecc359a6294b60426a3f6fdd628e4f
-size 48714504984

 version https://git-lfs.github.com/spec/v1
+oid sha256:8423bb31b093ec0f90a8c173c40dda3ca0fbb48b62bfa6c5e56633f604bfb4ce
+size 46075435392

model_001.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a9fc3a3f483c0ba6adcec77c5fc97aaa99fe23a6f6e547edf2cad356d9feb31
-size 40745331064

 version https://git-lfs.github.com/spec/v1
+oid sha256:61c72c3359917c910414718b9c9af81956e2303a513850fab3f411d657181fed
+size 46514935960

model_002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d43728978c9d83ecb5f04bfcc2acc7e5f625389ae38fa459dca4cb1f6cafe41
-size 39881286320

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4e434399cfeb463e1aa5edcdecc47c3ed81299d854c1102e032ad4da613bee5
+size 41441761136

model_003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5d816f7b7760b92be06ea378eaf525202df12cf7a8195a7082986d535458829
-size 40141207768

 version https://git-lfs.github.com/spec/v1
+oid sha256:c37fb8f1f302056aaabd8f84d6f7bee18ebd4c50c6642355b600a17b8d169f37
+size 41698236320

model_004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ced7c1739f523f6ad4e8fb39666cc5014cebd7b1015a57a38430e857d6ed4b7b
-size 39881114056

 version https://git-lfs.github.com/spec/v1
+oid sha256:f616abec4fcabe795c5d26af181b3d7254fc3a2e174a902f803e7babffdc05c9
+size 41428063888

quantize_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "bits": 4,
-  "group_size": -1,
-  "damp_percent": 0.01,
   "desc_act": true,
   "static_groups": false,
   "sym": false,
@@ -9,7 +9,7 @@
   "model_name_or_path": null,
   "model_file_base_name": null,
   "exponent_hinv": 4.0,
-  "shrink": 0.1,
-  "quant_method": "xmad",
   "checkpoint_format": "gptq"
 }

 {
   "bits": 4,
+  "group_size": 128,
+  "damp_percent": 0.025,
   "desc_act": true,
   "static_groups": false,
   "sym": false,
   "model_name_or_path": null,
   "model_file_base_name": null,
   "exponent_hinv": 4.0,
+  "shrink": 0.0625,
+  "quant_method": "gptq",
   "checkpoint_format": "gptq"
 }