| { | |
| "metadata": { | |
| "ParamSize": 303, | |
| "ParamBytes": 75715200.0, | |
| "BitsPerParam": 4.503003858127117 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33364224, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 49152, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 49152, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 15926400 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 16368768 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 16424064 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 17308800 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 17419392 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 17420544 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 17697024 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 17731584 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 17897472 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 17918208 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 17919360 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 18361728 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 18417024 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 19301760 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 19412352 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 19413504 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 19689984 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 19724544 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 19890432 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 19911168 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 19912320 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 20354688 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 20409984 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 21294720 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 21405312 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 21406464 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 21682944 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 21717504 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 21883392 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 21904128 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 21905280 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 22347648 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 22402944 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 23287680 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 23398272 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 23399424 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 23675904 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 23710464 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 23876352 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 23897088 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 23898240 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 24340608 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 24395904 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 25280640 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 25391232 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 25392384 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 25668864 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 25703424 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 25869312 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 25890048 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 25891200 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 26333568 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 26388864 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 27273600 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 27384192 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 27385344 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 27661824 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 27696384 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 27862272 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 27883008 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 27884160 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 28326528 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 28381824 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 29266560 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 29377152 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 29378304 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 29654784 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 29689344 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 29855232 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 29875968 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 29877120 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 30319488 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 30374784 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 31259520 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 31370112 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 31371264 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 31647744 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 31682304 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 31848192 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 31868928 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 31870080 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 32312448 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 32367744 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 33252480 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 33363072 | |
| } | |
| ], | |
| "md5sum": "bc7cb38f807f510b59f78653cf7dee86" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32883840, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 276480 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 311040 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 476928 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 497664 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 498816 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 941184 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 996480 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 1881216 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 1991808 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 1992960 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 2269440 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 2304000 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 2469888 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 2490624 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 2491776 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 2934144 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 2989440 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 3874176 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 3984768 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 3985920 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 4262400 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 4296960 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 4462848 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 4483584 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 4484736 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 4927104 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 4982400 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 5867136 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 5977728 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 5978880 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 6255360 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 6289920 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 6455808 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 6476544 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 6477696 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 6920064 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 6975360 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 7860096 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 7970688 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 7971840 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 8248320 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 8282880 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 8448768 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 8469504 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 8470656 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 8913024 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 8968320 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 9853056 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 9963648 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 9964800 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 10241280 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 10275840 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 10441728 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 10462464 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 10463616 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 10905984 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 10961280 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 11846016 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 11956608 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 11957760 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 12234240 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 12268800 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 12434688 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 12455424 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 12456576 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 12898944 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 12954240 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 13838976 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 13949568 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 13950720 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 14227200 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 14261760 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 14427648 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 14448384 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 14449536 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 14891904 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 14947200 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 15831936 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 15942528 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 15943680 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 16220160 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 16254720 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 16420608 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 16442496 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 16884864 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 16940160 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 17824896 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 17935488 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 17936640 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 18213120 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 18247680 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 18413568 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 18434304 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 18435456 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 18877824 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 18933120 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 19817856 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 19928448 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 19929600 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 20206080 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 20240640 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 20406528 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 20427264 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 20428416 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 20870784 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 20926080 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 21810816 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 21921408 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 21922560 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 22199040 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 22233600 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 22399488 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 22420224 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 22421376 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 22863744 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 22919040 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 23803776 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 23914368 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 23915520 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 24192000 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 24226560 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 24392448 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 24413184 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 24414336 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 24856704 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 24912000 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 25796736 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 25907328 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 25908480 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 26184960 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 26219520 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 26385408 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 26406144 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 26407296 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 26849664 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 26904960 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 27789696 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 27900288 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 27901440 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 28177920 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 28212480 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 28378368 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 28399104 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 28400256 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 28842624 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 28897920 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 29782656 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 29893248 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 29894400 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 30170880 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 30205440 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 30371328 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 30392064 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 30393216 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 30835584 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 30890880 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 31775616 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 31886208 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 31887360 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 32163840 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 32198400 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 32364288 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 32385024 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 32386176 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 32828544 | |
| } | |
| ], | |
| "md5sum": "0c00bc46e72136475c280bce049d4e02" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 9467136, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 884736 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 995328 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 996480 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 1272960 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 1307520 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 1473408 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 1494144 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 1495296 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 1937664 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 1992960 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 2877696 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 2988288 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 2989440 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 3265920 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 3300480 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 3466368 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 3487104 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 3488256 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 3930624 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 3985920 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 4870656 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 4981248 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 4982400 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 5258880 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 5293440 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 5459328 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 5480064 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 5481216 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 5923584 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 5978880 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 6863616 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 6974208 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 6975360 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 7251840 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 7286400 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 7452288 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 7473024 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 192 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 442368, | |
| "byteOffset": 7474176 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 48 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 55296, | |
| "byteOffset": 7916544 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 3072, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 884736, | |
| "byteOffset": 7971840 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 3072, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 110592, | |
| "byteOffset": 8856576 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 8967168 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 960, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 276480, | |
| "byteOffset": 8968320 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 960, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 34560, | |
| "byteOffset": 9244800 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 576, | |
| 72 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 165888, | |
| "byteOffset": 9279360 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 576, | |
| 18 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20736, | |
| "byteOffset": 9445248 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1152, | |
| "byteOffset": 9465984 | |
| } | |
| ], | |
| "md5sum": "e8c8b687c79f292d2dfe83a67f26799e" | |
| } | |
| ] | |
| } |