weiweiz1 commited on
Commit
6c8f465
·
verified ·
1 Parent(s): b73d250

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +46 -25
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "DeepseekV3ForCausalLM"
4
  ],
@@ -14,7 +15,6 @@
14
  "eos_token_id": 1,
15
  "ep_size": 1,
16
  "first_k_dense_replace": 3,
17
- "head_dim": 64,
18
  "hidden_act": "silu",
19
  "hidden_size": 7168,
20
  "initializer_range": 0.02,
@@ -33,40 +33,62 @@
33
  "num_hidden_layers": 61,
34
  "num_key_value_heads": 128,
35
  "num_nextn_predict_layers": 1,
36
- "pad_token_id": 128815,
37
  "pretraining_tp": 1,
38
  "q_lora_rank": 1536,
39
- "qk_head_dim": 192,
40
  "qk_nope_head_dim": 128,
41
  "qk_rope_head_dim": 64,
42
  "quantization_config": {
43
- "autoround_version": "0.6.0",
44
- "batch_size": 4,
45
- "bits": 4,
46
- "data_type": "mx_fp4e2m1",
47
- "group_size": 32,
48
- "low_gpu_mem_usage": true,
49
- "packing_format": "mx_fp",
50
- "quant_method": "auto-round",
51
- "scale_calculation_mode": [
52
- "even"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  ],
54
- "scale_format": [
55
- "e8m0"
56
- ],
57
- "seqlen": 512,
58
- "sym": true
59
  },
60
  "rms_norm_eps": 1e-06,
61
- "rope_interleave": true,
62
  "rope_scaling": {
63
- "beta_fast": 32.0,
64
- "beta_slow": 1.0,
65
- "factor": 40.0,
66
  "mscale": 1.0,
67
  "mscale_all_dim": 1.0,
68
  "original_max_position_embeddings": 4096,
69
- "rope_type": "yarn",
70
  "type": "yarn"
71
  },
72
  "rope_theta": 10000,
@@ -77,8 +99,7 @@
77
  "topk_group": 4,
78
  "topk_method": "noaux_tc",
79
  "torch_dtype": "bfloat16",
80
- "transformers_version": "4.53.1",
81
- "unsloth_fixed": true,
82
  "use_cache": true,
83
  "v_head_dim": 128,
84
  "vocab_size": 129280
 
1
  {
2
+ "_name_or_path": "/data5/yliu7/HF_HOME/DeepSeek-R1-bf16/DeepSeek-R1-bf16",
3
  "architectures": [
4
  "DeepseekV3ForCausalLM"
5
  ],
 
15
  "eos_token_id": 1,
16
  "ep_size": 1,
17
  "first_k_dense_replace": 3,
 
18
  "hidden_act": "silu",
19
  "hidden_size": 7168,
20
  "initializer_range": 0.02,
 
33
  "num_hidden_layers": 61,
34
  "num_key_value_heads": 128,
35
  "num_nextn_predict_layers": 1,
 
36
  "pretraining_tp": 1,
37
  "q_lora_rank": 1536,
 
38
  "qk_nope_head_dim": 128,
39
  "qk_rope_head_dim": 64,
40
  "quantization_config": {
41
+ "config_groups": {
42
+ "group_0": {
43
+ "input_activations": {
44
+ "actorder": null,
45
+ "block_structure": null,
46
+ "dynamic": "local",
47
+ "group_size": 32,
48
+ "is_mx": true,
49
+ "num_bits": 4,
50
+ "observer": "minmax",
51
+ "observer_kwargs": {},
52
+ "strategy": "tensor_group",
53
+ "symmetric": true,
54
+ "type": "float"
55
+ },
56
+ "output_activations": null,
57
+ "targets": [
58
+ "Linear"
59
+ ],
60
+ "weights": {
61
+ "actorder": null,
62
+ "block_structure": null,
63
+ "dynamic": false,
64
+ "group_size": 32,
65
+ "is_mx": true,
66
+ "num_bits": 4,
67
+ "observer": "minmax",
68
+ "observer_kwargs": {},
69
+ "strategy": "tensor_group",
70
+ "symmetric": true,
71
+ "type": "float"
72
+ }
73
+ }
74
+ },
75
+ "format": "float-quantized",
76
+ "global_compression_ratio": null,
77
+ "ignore": [
78
+ "lm_head"
79
  ],
80
+ "kv_cache_scheme": null,
81
+ "quant_method": "compressed-tensors",
82
+ "quantization_status": "compressed"
 
 
83
  },
84
  "rms_norm_eps": 1e-06,
 
85
  "rope_scaling": {
86
+ "beta_fast": 32,
87
+ "beta_slow": 1,
88
+ "factor": 40,
89
  "mscale": 1.0,
90
  "mscale_all_dim": 1.0,
91
  "original_max_position_embeddings": 4096,
 
92
  "type": "yarn"
93
  },
94
  "rope_theta": 10000,
 
99
  "topk_group": 4,
100
  "topk_method": "noaux_tc",
101
  "torch_dtype": "bfloat16",
102
+ "transformers_version": "4.47.0",
 
103
  "use_cache": true,
104
  "v_head_dim": 128,
105
  "vocab_size": 129280