|
{ |
|
"tensors": { |
|
"h.4.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"wpe.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1024, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1024, |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
192 |
|
], |
|
"shape": [ |
|
1024, |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
384 |
|
], |
|
"shape": [ |
|
1024, |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
1024, |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"ln_f.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"wte.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
50257, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
50257, |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
192 |
|
], |
|
"shape": [ |
|
50257, |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
384 |
|
], |
|
"shape": [ |
|
50257, |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
50257, |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"ln_f.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.ln_1.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.ln_1.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.10.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.mlp.c_fc.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304 |
|
], |
|
"shape": [ |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.11.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.5.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.mlp.c_fc.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1536 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
2304 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.2.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.attn.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576, |
|
0 |
|
], |
|
"shape": [ |
|
192, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.3.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.9.attn.c_attn.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768, |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
576 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1152 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
1728 |
|
], |
|
"shape": [ |
|
768, |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.6.mlp.c_proj.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
768, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1536, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
2304, |
|
0 |
|
], |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.attn.c_attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
1152 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
1728 |
|
], |
|
"shape": [ |
|
576 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.0.attn.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
1024 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
0 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
256 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
512 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
0, |
|
0, |
|
0, |
|
768 |
|
], |
|
"shape": [ |
|
1, |
|
1, |
|
1024, |
|
256 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.1.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.ln_2.weight": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.8.attn.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.7.ln_2.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
}, |
|
"h.4.mlp.c_proj.bias": { |
|
"type": "Distributed", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "F32", |
|
"chunks": [ |
|
{ |
|
"offsets": [ |
|
0 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 0 |
|
}, |
|
{ |
|
"offsets": [ |
|
192 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 1 |
|
}, |
|
{ |
|
"offsets": [ |
|
384 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 2 |
|
}, |
|
{ |
|
"offsets": [ |
|
576 |
|
], |
|
"shape": [ |
|
192 |
|
], |
|
"filename_index": 3 |
|
} |
|
] |
|
} |
|
}, |
|
"filenames": [ |
|
"rank0.safetensors", |
|
"rank1.safetensors", |
|
"rank2.safetensors", |
|
"rank3.safetensors" |
|
], |
|
"n_ranks": 4 |
|
} |