dgpt2-4 / topology.json
Narsil's picture
Upload redistributed model with 5 files
11a4f8b verified
{
"tensors": {
"h.4.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.5.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.5.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.6.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.5.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.1.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.3.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.10.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.4.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.1.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.9.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.5.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.10.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.2.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.2.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.1.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.8.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.4.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.1.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.7.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.6.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.10.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.2.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.2.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.2.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.6.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.9.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.0.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.5.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.1.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.9.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.9.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.2.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.6.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.0.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.7.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.0.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.4.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.8.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.10.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.2.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.5.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.10.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.11.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.8.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.4.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.9.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.9.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"wpe.weight": {
"type": "Distributed",
"shape": [
1024,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
1024,
192
],
"filename_index": 0
},
{
"offsets": [
0,
192
],
"shape": [
1024,
192
],
"filename_index": 1
},
{
"offsets": [
0,
384
],
"shape": [
1024,
192
],
"filename_index": 2
},
{
"offsets": [
0,
576
],
"shape": [
1024,
192
],
"filename_index": 3
}
]
},
"h.1.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.6.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"ln_f.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.9.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.5.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.4.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.10.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.9.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.6.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"wte.weight": {
"type": "Distributed",
"shape": [
50257,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
50257,
192
],
"filename_index": 0
},
{
"offsets": [
0,
192
],
"shape": [
50257,
192
],
"filename_index": 1
},
{
"offsets": [
0,
384
],
"shape": [
50257,
192
],
"filename_index": 2
},
{
"offsets": [
0,
576
],
"shape": [
50257,
192
],
"filename_index": 3
}
]
},
"h.2.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.3.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.8.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.8.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.2.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.4.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.10.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.2.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.1.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.5.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.0.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.5.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.2.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.5.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.3.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.4.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.0.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.10.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.6.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.8.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.6.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.4.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.1.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.0.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.8.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.8.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.10.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.8.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.0.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.7.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.1.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.10.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.3.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.1.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.1.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.0.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.5.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.8.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.0.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.10.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"ln_f.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.ln_1.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.9.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.5.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.11.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.10.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.1.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.11.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.9.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.8.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.2.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.11.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.0.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.11.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.0.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.4.ln_1.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.10.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.mlp.c_fc.bias": {
"type": "Distributed",
"shape": [
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
768
],
"filename_index": 0
},
{
"offsets": [
768
],
"shape": [
768
],
"filename_index": 1
},
{
"offsets": [
1536
],
"shape": [
768
],
"filename_index": 2
},
{
"offsets": [
2304
],
"shape": [
768
],
"filename_index": 3
}
]
},
"h.6.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.11.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.6.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.5.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.9.mlp.c_fc.weight": {
"type": "Distributed",
"shape": [
768,
3072
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
0,
768
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
0,
1536
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
0,
2304
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.4.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.3.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.2.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.6.attn.c_proj.weight": {
"type": "Distributed",
"shape": [
768,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
192,
768
],
"filename_index": 0
},
{
"offsets": [
192,
0
],
"shape": [
192,
768
],
"filename_index": 1
},
{
"offsets": [
384,
0
],
"shape": [
192,
768
],
"filename_index": 2
},
{
"offsets": [
576,
0
],
"shape": [
192,
768
],
"filename_index": 3
}
]
},
"h.6.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.9.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.4.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.3.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.9.attn.c_attn.weight": {
"type": "Distributed",
"shape": [
768,
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
576
],
"filename_index": 0
},
{
"offsets": [
0,
576
],
"shape": [
768,
576
],
"filename_index": 1
},
{
"offsets": [
0,
1152
],
"shape": [
768,
576
],
"filename_index": 2
},
{
"offsets": [
0,
1728
],
"shape": [
768,
576
],
"filename_index": 3
}
]
},
"h.6.mlp.c_proj.weight": {
"type": "Distributed",
"shape": [
3072,
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0
],
"shape": [
768,
768
],
"filename_index": 0
},
{
"offsets": [
768,
0
],
"shape": [
768,
768
],
"filename_index": 1
},
{
"offsets": [
1536,
0
],
"shape": [
768,
768
],
"filename_index": 2
},
{
"offsets": [
2304,
0
],
"shape": [
768,
768
],
"filename_index": 3
}
]
},
"h.0.attn.c_attn.bias": {
"type": "Distributed",
"shape": [
2304
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
576
],
"filename_index": 0
},
{
"offsets": [
576
],
"shape": [
576
],
"filename_index": 1
},
{
"offsets": [
1152
],
"shape": [
576
],
"filename_index": 2
},
{
"offsets": [
1728
],
"shape": [
576
],
"filename_index": 3
}
]
},
"h.0.attn.bias": {
"type": "Distributed",
"shape": [
1,
1,
1024,
1024
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0,
0,
0,
0
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 0
},
{
"offsets": [
0,
0,
0,
256
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 1
},
{
"offsets": [
0,
0,
0,
512
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 2
},
{
"offsets": [
0,
0,
0,
768
],
"shape": [
1,
1,
1024,
256
],
"filename_index": 3
}
]
},
"h.8.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.1.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.ln_2.weight": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.8.attn.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.7.ln_2.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
},
"h.4.mlp.c_proj.bias": {
"type": "Distributed",
"shape": [
768
],
"dtype": "F32",
"chunks": [
{
"offsets": [
0
],
"shape": [
192
],
"filename_index": 0
},
{
"offsets": [
192
],
"shape": [
192
],
"filename_index": 1
},
{
"offsets": [
384
],
"shape": [
192
],
"filename_index": 2
},
{
"offsets": [
576
],
"shape": [
192
],
"filename_index": 3
}
]
}
},
"filenames": [
"rank0.safetensors",
"rank1.safetensors",
"rank2.safetensors",
"rank3.safetensors"
],
"n_ranks": 4
}