michaelfeil commited on
Commit
02a291d
·
verified ·
1 Parent(s): 9dff143

Add files using upload-large-folder tool

Browse files
tllm-checkpoint/config.json CHANGED
@@ -66,105 +66,105 @@
66
  "has_zero_point": false,
67
  "pre_quant_scale": false,
68
  "exclude_modules": [
69
- "transformer.layers.2.post_layernorm",
70
- "transformer.layers.22.input_layernorm",
71
- "transformer.layers.11.mlp.shared_expert_gate",
72
- "transformer.layers.22.post_layernorm",
73
  "transformer.layers.20.mlp.router",
74
- "transformer.layers.10.mlp.shared_expert_gate",
 
 
 
75
  "transformer.layers.14.mlp.router",
76
- "transformer.layers.16.mlp.shared_expert_gate",
77
- "transformer.layers.7.post_layernorm",
78
- "transformer.layers.0.mlp.shared_expert_gate",
79
- "transformer.layers.1.post_layernorm",
80
- "transformer.layers.4.mlp.shared_expert_gate",
81
- "transformer.layers.8.post_layernorm",
82
- "transformer.layers.5.mlp.router",
83
- "transformer.layers.8.mlp.router",
84
- "transformer.layers.21.post_layernorm",
85
  "transformer.layers.23.post_layernorm",
86
- "transformer.layers.4.post_layernorm",
87
- "transformer.layers.1.mlp.router",
88
- "transformer.layers.6.post_layernorm",
89
- "transformer.layers.23.mlp.router",
90
- "transformer.layers.8.input_layernorm",
91
- "transformer.layers.17.mlp.shared_expert_gate",
92
- "transformer.layers.8.mlp.shared_expert_gate",
93
  "transformer.layers.9.mlp.shared_expert_gate",
94
- "transformer.layers.15.mlp.router",
95
- "transformer.layers.6.mlp.router",
96
- "transformer.layers.10.post_layernorm",
97
- "transformer.layers.0.post_layernorm",
98
- "transformer.layers.14.input_layernorm",
99
- "transformer.layers.13.mlp.router",
100
- "transformer.layers.15.mlp.shared_expert_gate",
101
- "transformer.layers.0.mlp.router",
102
- "transformer.layers.5.post_layernorm",
103
- "transformer.layers.9.post_layernorm",
104
- "transformer.layers.17.post_layernorm",
105
- "transformer.layers.2.input_layernorm",
106
  "transformer.layers.11.mlp.router",
107
- "transformer.layers.3.post_layernorm",
108
- "transformer.vocab_embedding",
109
- "transformer.layers.22.mlp.router",
110
- "transformer.layers.5.mlp.shared_expert_gate",
111
- "transformer.layers.6.mlp.shared_expert_gate",
112
- "transformer.layers.14.mlp.shared_expert_gate",
113
- "transformer.layers.22.mlp.shared_expert_gate",
114
- "transformer.layers.18.mlp.shared_expert_gate",
115
- "transformer.layers.5.input_layernorm",
116
- "transformer.layers.20.mlp.shared_expert_gate",
117
- "transformer.layers.9.mlp.router",
118
  "transformer.layers.3.mlp.shared_expert_gate",
119
- "transformer.layers.10.input_layernorm",
120
  "transformer.layers.14.post_layernorm",
121
- "transformer.layers.19.mlp.router",
122
- "transformer.layers.19.post_layernorm",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  "transformer.layers.2.mlp.router",
 
 
 
 
124
  "transformer.layers.23.input_layernorm",
125
- "transformer.layers.2.mlp.shared_expert_gate",
126
- "transformer.layers.4.mlp.router",
127
- "transformer.layers.12.mlp.shared_expert_gate",
128
- "transformer.layers.1.mlp.shared_expert_gate",
129
  "transformer.layers.20.input_layernorm",
130
- "transformer.layers.7.input_layernorm",
131
- "transformer.layers.20.post_layernorm",
132
- "transformer.layers.18.input_layernorm",
 
 
133
  "transformer.layers.17.mlp.router",
134
- "transformer.layers.23.mlp.shared_expert_gate",
135
- "transformer.layers.15.input_layernorm",
 
 
 
 
 
 
 
136
  "transformer.layers.16.post_layernorm",
137
- "transformer.layers.19.mlp.shared_expert_gate",
138
- "transformer.layers.3.mlp.router",
139
- "transformer.layers.4.input_layernorm",
140
  "transformer.layers.19.input_layernorm",
141
- "transformer.layers.17.input_layernorm",
142
- "transformer.layers.21.input_layernorm",
143
- "transformer.layers.13.input_layernorm",
144
- "transformer.layers.7.mlp.router",
145
- "transformer.layers.16.mlp.router",
146
- "transformer.layers.3.input_layernorm",
147
- "transformer.layers.15.post_layernorm",
 
 
 
 
148
  "transformer.layers.11.input_layernorm",
149
- "transformer.layers.9.input_layernorm",
150
- "transformer.layers.10.mlp.router",
151
- "lm_head",
152
- "transformer.layers.0.input_layernorm",
153
- "transformer.layers.16.input_layernorm",
154
- "transformer.layers.18.post_layernorm",
155
- "transformer.layers.21.mlp.shared_expert_gate",
156
- "transformer.layers.6.input_layernorm",
157
- "transformer.layers.7.mlp.shared_expert_gate",
158
  "transformer.layers.11.post_layernorm",
 
 
159
  "transformer.layers.1.input_layernorm",
160
- "transformer.ln_f",
161
- "transformer.layers.13.post_layernorm",
162
- "transformer.layers.12.input_layernorm",
163
- "transformer.layers.13.mlp.shared_expert_gate",
164
  "transformer.layers.12.mlp.router",
 
165
  "transformer.layers.12.post_layernorm",
166
- "transformer.layers.21.mlp.router",
167
- "transformer.layers.18.mlp.router"
 
 
 
 
 
 
 
 
 
 
 
168
  ]
169
  },
170
  "qk_layernorm": false,
 
66
  "has_zero_point": false,
67
  "pre_quant_scale": false,
68
  "exclude_modules": [
69
+ "transformer.layers.7.mlp.shared_expert_gate",
 
 
 
70
  "transformer.layers.20.mlp.router",
71
+ "lm_head",
72
+ "transformer.layers.18.post_layernorm",
73
+ "transformer.layers.17.mlp.shared_expert_gate",
74
+ "transformer.layers.6.post_layernorm",
75
  "transformer.layers.14.mlp.router",
 
 
 
 
 
 
 
 
 
76
  "transformer.layers.23.post_layernorm",
77
+ "transformer.layers.22.mlp.router",
78
+ "transformer.layers.7.input_layernorm",
 
 
 
 
 
79
  "transformer.layers.9.mlp.shared_expert_gate",
80
+ "transformer.layers.8.mlp.router",
 
 
 
 
 
 
 
 
 
 
 
81
  "transformer.layers.11.mlp.router",
82
+ "transformer.layers.19.mlp.router",
83
+ "transformer.layers.13.mlp.router",
 
 
 
 
 
 
 
 
 
84
  "transformer.layers.3.mlp.shared_expert_gate",
 
85
  "transformer.layers.14.post_layernorm",
86
+ "transformer.layers.10.mlp.shared_expert_gate",
87
+ "transformer.layers.6.input_layernorm",
88
+ "transformer.layers.1.mlp.router",
89
+ "transformer.layers.21.mlp.router",
90
+ "transformer.layers.20.mlp.shared_expert_gate",
91
+ "transformer.layers.13.input_layernorm",
92
+ "transformer.layers.1.mlp.shared_expert_gate",
93
+ "transformer.layers.9.input_layernorm",
94
+ "transformer.layers.18.mlp.router",
95
+ "transformer.layers.18.mlp.shared_expert_gate",
96
+ "transformer.layers.21.input_layernorm",
97
+ "transformer.layers.15.post_layernorm",
98
+ "transformer.layers.22.mlp.shared_expert_gate",
99
+ "transformer.layers.4.post_layernorm",
100
+ "transformer.layers.15.input_layernorm",
101
+ "transformer.layers.13.post_layernorm",
102
+ "transformer.layers.4.mlp.router",
103
+ "transformer.layers.23.mlp.router",
104
+ "transformer.layers.3.mlp.router",
105
  "transformer.layers.2.mlp.router",
106
+ "transformer.layers.21.post_layernorm",
107
+ "transformer.layers.9.post_layernorm",
108
+ "transformer.layers.5.mlp.shared_expert_gate",
109
+ "transformer.layers.1.post_layernorm",
110
  "transformer.layers.23.input_layernorm",
111
+ "transformer.layers.7.mlp.router",
 
 
 
112
  "transformer.layers.20.input_layernorm",
113
+ "transformer.layers.13.mlp.shared_expert_gate",
114
+ "transformer.layers.15.mlp.router",
115
+ "transformer.layers.7.post_layernorm",
116
+ "transformer.layers.22.input_layernorm",
117
+ "transformer.layers.2.mlp.shared_expert_gate",
118
  "transformer.layers.17.mlp.router",
119
+ "transformer.layers.19.post_layernorm",
120
+ "transformer.layers.4.mlp.shared_expert_gate",
121
+ "transformer.layers.6.mlp.shared_expert_gate",
122
+ "transformer.layers.9.mlp.router",
123
+ "transformer.layers.16.input_layernorm",
124
+ "transformer.layers.0.input_layernorm",
125
+ "transformer.layers.11.mlp.shared_expert_gate",
126
+ "transformer.layers.12.mlp.shared_expert_gate",
127
+ "transformer.layers.16.mlp.shared_expert_gate",
128
  "transformer.layers.16.post_layernorm",
 
 
 
129
  "transformer.layers.19.input_layernorm",
130
+ "transformer.layers.14.mlp.shared_expert_gate",
131
+ "transformer.layers.10.post_layernorm",
132
+ "transformer.vocab_embedding",
133
+ "transformer.layers.8.post_layernorm",
134
+ "transformer.layers.10.input_layernorm",
135
+ "transformer.layers.2.input_layernorm",
136
+ "transformer.layers.14.input_layernorm",
137
+ "transformer.layers.5.input_layernorm",
138
+ "transformer.layers.18.input_layernorm",
139
+ "transformer.layers.3.post_layernorm",
140
+ "transformer.layers.5.post_layernorm",
141
  "transformer.layers.11.input_layernorm",
142
+ "transformer.layers.12.input_layernorm",
143
+ "transformer.layers.23.mlp.shared_expert_gate",
144
+ "transformer.layers.16.mlp.router",
145
+ "transformer.layers.15.mlp.shared_expert_gate",
146
+ "transformer.layers.0.mlp.shared_expert_gate",
 
 
 
 
147
  "transformer.layers.11.post_layernorm",
148
+ "transformer.layers.22.post_layernorm",
149
+ "transformer.layers.10.mlp.router",
150
  "transformer.layers.1.input_layernorm",
151
+ "transformer.layers.17.post_layernorm",
 
 
 
152
  "transformer.layers.12.mlp.router",
153
+ "transformer.layers.19.mlp.shared_expert_gate",
154
  "transformer.layers.12.post_layernorm",
155
+ "transformer.ln_f",
156
+ "transformer.layers.3.input_layernorm",
157
+ "transformer.layers.17.input_layernorm",
158
+ "transformer.layers.8.mlp.shared_expert_gate",
159
+ "transformer.layers.0.post_layernorm",
160
+ "transformer.layers.0.mlp.router",
161
+ "transformer.layers.2.post_layernorm",
162
+ "transformer.layers.8.input_layernorm",
163
+ "transformer.layers.4.input_layernorm",
164
+ "transformer.layers.21.mlp.shared_expert_gate",
165
+ "transformer.layers.6.mlp.router",
166
+ "transformer.layers.5.mlp.router",
167
+ "transformer.layers.20.post_layernorm"
168
  ]
169
  },
170
  "qk_layernorm": false,
tllm-checkpoint/rank0.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11d5a9365de3dfa2e1ca78676690b297d084a702839d259f4dd41d8198a95648
3
- size 8725475264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f1209747dafd1fe1b4db5cf54b6e747c5234252e38bf30e1adf5503ed96ebed
3
+ size 8725465656
tllm-checkpoint/rank1.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a9e0df4a8fa16bf25856bd427233692f8d58dc51abbbd2f5cd75d9a52b7f55a
3
- size 8725475264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680fbac67d297f021d2f4c1f5320b86d167e973aef2947228d4228f1da981aed
3
+ size 8725465656