diff --git "a/topology.json" "b/topology.json" new file mode 100644--- /dev/null +++ "b/topology.json" @@ -0,0 +1,7985 @@ +{ + "tensors": { + "h.4.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.5.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.5.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.6.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.5.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.1.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.3.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.10.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.4.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.1.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.9.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.5.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.10.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.2.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.2.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.1.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.8.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.4.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.1.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.7.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.6.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.10.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.2.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.2.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.2.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.6.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.9.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.0.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.5.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.1.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.9.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.9.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.2.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.6.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.0.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.7.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.0.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.4.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.8.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.10.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.2.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.5.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.10.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.11.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.8.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.4.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.9.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.9.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "wpe.weight": { + "type": "Distributed", + "shape": [ + 1024, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 1024, + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 192 + ], + "shape": [ + 1024, + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 384 + ], + "shape": [ + 1024, + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 1024, + 192 + ], + "filename_index": 3 + } + ] + }, + "h.1.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.6.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "ln_f.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.9.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.5.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.4.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.10.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.9.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.6.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "wte.weight": { + "type": "Distributed", + "shape": [ + 50257, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 50257, + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 192 + ], + "shape": [ + 50257, + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 384 + ], + "shape": [ + 50257, + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 50257, + 192 + ], + "filename_index": 3 + } + ] + }, + "h.2.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.3.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.8.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.8.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.2.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.4.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.10.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.2.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.1.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.5.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.0.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.5.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.2.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.5.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.3.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.4.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.0.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.10.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.6.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.8.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.6.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.4.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.1.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.0.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.8.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.8.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.10.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.8.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.0.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.7.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.1.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.10.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.3.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.1.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.1.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.0.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.5.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.8.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.0.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.10.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "ln_f.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.ln_1.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.9.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.5.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.11.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.10.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.1.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.11.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.9.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.8.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.2.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.11.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.0.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.11.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.0.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.4.ln_1.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.10.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.mlp.c_fc.bias": { + "type": "Distributed", + "shape": [ + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768 + ], + "shape": [ + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536 + ], + "shape": [ + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304 + ], + "shape": [ + 768 + ], + "filename_index": 3 + } + ] + }, + "h.6.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.11.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.6.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.5.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.9.mlp.c_fc.weight": { + "type": "Distributed", + "shape": [ + 768, + 3072 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 768 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1536 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 2304 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.4.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.3.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.2.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.6.attn.c_proj.weight": { + "type": "Distributed", + "shape": [ + 768, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576, + 0 + ], + "shape": [ + 192, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.6.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.9.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.4.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.3.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.9.attn.c_attn.weight": { + "type": "Distributed", + "shape": [ + 768, + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 576 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 1152 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 1728 + ], + "shape": [ + 768, + 576 + ], + "filename_index": 3 + } + ] + }, + "h.6.mlp.c_proj.weight": { + "type": "Distributed", + "shape": [ + 3072, + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 0 + }, + { + "offsets": [ + 768, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1536, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 2 + }, + { + "offsets": [ + 2304, + 0 + ], + "shape": [ + 768, + 768 + ], + "filename_index": 3 + } + ] + }, + "h.0.attn.c_attn.bias": { + "type": "Distributed", + "shape": [ + 2304 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 576 + ], + "filename_index": 0 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 576 + ], + "filename_index": 1 + }, + { + "offsets": [ + 1152 + ], + "shape": [ + 576 + ], + "filename_index": 2 + }, + { + "offsets": [ + 1728 + ], + "shape": [ + 576 + ], + "filename_index": 3 + } + ] + }, + "h.0.attn.bias": { + "type": "Distributed", + "shape": [ + 1, + 1, + 1024, + 1024 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0, + 0, + 0, + 0 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 0 + }, + { + "offsets": [ + 0, + 0, + 0, + 256 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 1 + }, + { + "offsets": [ + 0, + 0, + 0, + 512 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 2 + }, + { + "offsets": [ + 0, + 0, + 0, + 768 + ], + "shape": [ + 1, + 1, + 1024, + 256 + ], + "filename_index": 3 + } + ] + }, + "h.8.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.1.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.ln_2.weight": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.8.attn.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.7.ln_2.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + }, + "h.4.mlp.c_proj.bias": { + "type": "Distributed", + "shape": [ + 768 + ], + "dtype": "F32", + "chunks": [ + { + "offsets": [ + 0 + ], + "shape": [ + 192 + ], + "filename_index": 0 + }, + { + "offsets": [ + 192 + ], + "shape": [ + 192 + ], + "filename_index": 1 + }, + { + "offsets": [ + 384 + ], + "shape": [ + 192 + ], + "filename_index": 2 + }, + { + "offsets": [ + 576 + ], + "shape": [ + 192 + ], + "filename_index": 3 + } + ] + } + }, + "filenames": [ + "rank0.safetensors", + "rank1.safetensors", + "rank2.safetensors", + "rank3.safetensors" + ], + "n_ranks": 4 +} \ No newline at end of file