Yi30 commited on
Commit
69b9ad1
·
verified ·
1 Parent(s): 4cf5f9d

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
@@ -29,8 +30,9 @@
29
  "actorder": null,
30
  "block_structure": null,
31
  "dynamic": "local",
32
- "group_size": 16,
33
- "num_bits": 4,
 
34
  "observer": "minmax",
35
  "observer_kwargs": {},
36
  "strategy": "tensor_group",
@@ -45,8 +47,9 @@
45
  "actorder": null,
46
  "block_structure": null,
47
  "dynamic": false,
48
- "group_size": 16,
49
- "num_bits": 4,
 
50
  "observer": "minmax",
51
  "observer_kwargs": {},
52
  "strategy": "tensor_group",
@@ -55,7 +58,7 @@
55
  }
56
  }
57
  },
58
- "format": "nvfp4-pack-quantized",
59
  "global_compression_ratio": null,
60
  "ignore": [
61
  "lm_head"
@@ -75,7 +78,7 @@
75
  "rope_theta": 500000.0,
76
  "tie_word_embeddings": false,
77
  "torch_dtype": "bfloat16",
78
- "transformers_version": "4.51.3",
79
  "use_cache": true,
80
  "vocab_size": 128256
81
  }
 
1
  {
2
+ "_name_or_path": "meta-llama/Llama-3.3-70B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
30
  "actorder": null,
31
  "block_structure": null,
32
  "dynamic": "local",
33
+ "group_size": 32,
34
+ "is_mx": true,
35
+ "num_bits": 8,
36
  "observer": "minmax",
37
  "observer_kwargs": {},
38
  "strategy": "tensor_group",
 
47
  "actorder": null,
48
  "block_structure": null,
49
  "dynamic": false,
50
+ "group_size": 32,
51
+ "is_mx": true,
52
+ "num_bits": 8,
53
  "observer": "minmax",
54
  "observer_kwargs": {},
55
  "strategy": "tensor_group",
 
58
  }
59
  }
60
  },
61
+ "format": "float-quantized",
62
  "global_compression_ratio": null,
63
  "ignore": [
64
  "lm_head"
 
78
  "rope_theta": 500000.0,
79
  "tie_word_embeddings": false,
80
  "torch_dtype": "bfloat16",
81
+ "transformers_version": "4.47.0",
82
  "use_cache": true,
83
  "vocab_size": 128256
84
  }
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  ],
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.51.3"
12
  }
 
8
  ],
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.47.0"
12
  }
model-00001-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:722948ea9e8a88f08776bbfc3071cc09fd69f9bc48bc766d30f0ed4788633697
3
+ size 4904295040
model-00002-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81d98efb24d5f10e797647d55fa525990d2cc7ab367cc08846014eb340236dcc
3
+ size 4896499344
model-00003-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16f9f7315ee6213342231265cecd0974a5427032180afef7f2eddfc9e50e0036
3
+ size 4810025600
model-00004-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606a7df04f36ef8892d6536bede20ee27c741b57663ae7abeaa58e96c2e57036
3
+ size 4896499432
model-00005-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06ab1527b00f0c586884d1f2185870e3d1a1f4d5c2d52ad06a71e2c835897b4
3
+ size 4810025624
model-00006-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218e5f8b88c7244c5699b1e8bea026f05c5abf52050d9ff78a132876769669c7
3
+ size 4896499432
model-00007-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078b787fc329db684b704f518da01056157ab6b49a42b5f90adb0e44e690be5c
3
+ size 4810025624
model-00008-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f87188ca957c20c044347369f38dc9c526dc204be6ea62b4a79a974ddbf7a94b
3
+ size 4896499432
model-00009-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ab12e5941d789d5689eee77ebeaf7db2c8f99d0d13133ecacc6ba7e5f1d5754
3
+ size 4810025624
model-00010-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79e1ec3b3fc217327102d0a58b9bf338d90f72b9eda43069486570105706e952
3
+ size 4896499432
model-00011-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f01a3aab1f1a75c19b1546a1d472ff0a3e1886764a5e28305b1124d1303d937
3
+ size 4810025624
model-00012-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b7b3916e3ce3dbcf8af8d492a37fece0b51d6383216fa0321092ce17f7b812e
3
+ size 4896499432
model-00013-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f4e116b0fc0b1be4ff84243ef5b00ada77a08efb0748bf160d0971b5f487f0
3
+ size 4810025624
model-00014-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd50d7bf866fe74c5d6287cff0557aecdd277e5e4fe370c5d74a65a0620f726
3
+ size 4896499432
model-00015-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4622d9300ef1d594899cecb63478ce56a8baed45a7fe0ef1952a02ef2569d09
3
+ size 4654327584
model-00016-of-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0436c98c9b4f78e61f6769e588e0853179b970e2a084ae2886e2daebf1ce13d
3
+ size 2101346432
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
recipe.yaml CHANGED
@@ -3,4 +3,4 @@ default_stage:
3
  QuantizationModifier:
4
  targets: [Linear]
5
  ignore: [lm_head]
6
- scheme: NVFP4
 
3
  QuantizationModifier:
4
  targets: [Linear]
5
  ignore: [lm_head]
6
+ scheme: MXFP8
tokenizer_config.json CHANGED
@@ -2060,5 +2060,5 @@
2060
  ],
2061
  "model_max_length": 131072,
2062
  "pad_token": "<|finetune_right_pad_id|>",
2063
- "tokenizer_class": "PreTrainedTokenizer"
2064
  }
 
2060
  ],
2061
  "model_max_length": 131072,
2062
  "pad_token": "<|finetune_right_pad_id|>",
2063
+ "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }