diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,11424 @@ +{ + "metadata": { + "ParamSize": 1037, + "ParamBytes": 1795483744.0, + "BitsPerParam": 4.507976587172874 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "language_model.lm_head.linear.q_weight", + "shape": [ + 51200, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "463f798bd1c1bb9f4c47636813571bbd" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 32179040, + "records": [ + { + "name": "language_model.lm_head.linear.bias", + "shape": [ + 51200 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 102400, + "byteOffset": 0 + }, + { + "name": "language_model.lm_head.linear.q_scale", + "shape": [ + 51200, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 102400 + }, + { + "name": "language_model.lm_head.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8294400 + }, + { + "name": "language_model.lm_head.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8299520 + }, + { + "name": "multi_modal_projector.linear_1.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8304640 + }, + { + "name": "multi_modal_projector.linear_1.q_weight", + "shape": [ + 2560, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1474560, + "byteOffset": 8309760 + }, + { + "name": "multi_modal_projector.linear_1.q_scale", + "shape": [ + 2560, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 184320, + "byteOffset": 9784320 + }, + { + "name": "multi_modal_projector.linear_2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 9968640 + }, + { + "name": "multi_modal_projector.linear_2.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9973760 + }, + { + "name": "multi_modal_projector.linear_2.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13250560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 13660160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 13662464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 13664768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 13667072 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 13669376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 13677984 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 16157088 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16466976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 16469280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 18957600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 19268640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 19270944 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 19934496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20017440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 20019744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 20683296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20766240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20768544 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20770848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20773152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 20775456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 20784064 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 23263168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 23573056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 23575360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 26063680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 26374720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 26377024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 27040576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27123520 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 27125824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 27789376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27872320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 27874624 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 28538176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28621120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 28623424 + }, + { + "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 29286976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 29369920 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 29372224 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 29374528 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 29376832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 29379136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 29387744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 31866848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32176736 + } + ], + "md5sum": "6653206d4095afaf2ad3770d1bc90a01" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "language_model.transformer.embd.q_weight", + "shape": [ + 51200, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "a32829672f47c5848bd32124b284b6ee" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 28783360, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 2488320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2799360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 2801664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 3465216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 3548160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 3550464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 4214016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 4296960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 4299264 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 4962816 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5045760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 5048064 + }, + { + "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 5711616 + }, + { + "name": "language_model.transformer.embd.q_scale", + "shape": [ + 51200, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 5794560 + }, + { + "name": "language_model.transformer.h.0.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13986560 + }, + { + "name": "language_model.transformer.h.0.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13991680 + }, + { + "name": "language_model.transformer.h.0.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13996800 + }, + { + "name": "language_model.transformer.h.0.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14012160 + }, + { + "name": "language_model.transformer.h.0.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 23842560 + }, + { + "name": "language_model.transformer.h.0.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25071360 + }, + { + "name": "language_model.transformer.h.0.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25076480 + }, + { + "name": "language_model.transformer.h.0.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 28353280 + }, + { + "name": "language_model.transformer.h.0.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 28762880 + } + ], + "md5sum": "514d90e8394b01d016343dfc1624e936" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.0.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.0.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.0.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.0.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.0.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.1.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.1.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.1.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "4443aa6b8c666c21a4bc98fc5cd724da" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.1.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.1.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.1.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.1.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.1.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.1.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.1.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.1.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.1.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "0435f6bbf7397937f36e69008581d438" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.1.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.1.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.2.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.2.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.2.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.2.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.2.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.2.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.2.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.2.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.2.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "444d6a81d36b0cc9f10ac60e8f768712" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.2.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.2.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.2.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.2.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.2.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.3.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.3.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.3.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "dbf93da436f3e51305b37122bfb9448f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.3.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.3.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.3.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.3.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.3.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.3.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.3.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.3.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.3.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "dea8e2c5621bdcb6c9571021598c9494" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.3.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.3.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.4.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.4.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.4.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.4.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.4.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.4.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.4.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.4.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.4.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "b65e7a3bd896a66f44d0a610d55485d0" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.4.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.4.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.10.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.10.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.10.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.10.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.10.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.10.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.10.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.10.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.10.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "74e55cd70e981054f51f55db0100dab6" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29511680, + "records": [ + { + "name": "language_model.transformer.h.10.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.10.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.10.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.10.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.10.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.11.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.11.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.4.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29506560 + } + ], + "md5sum": "ec111926051d4ac9f004bcbc6af54f1d" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.4.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.4.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.5.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.5.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.5.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.5.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.5.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.5.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.5.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.5.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.5.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "599943cf4b3168eece30544134d0b28f" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.5.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.5.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.5.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.5.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.5.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.6.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.6.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.6.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "1b3589577f8280bcbb7eb6053513a5c0" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.6.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.6.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.6.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.6.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.6.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.6.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.6.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.6.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.6.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "506678fb762604e248c9c576be97955a" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.6.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.6.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.7.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.7.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.7.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.7.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.7.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.7.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.7.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.7.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.7.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "0bdf7d328729b5d57550f6db6b7cc0b0" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.7.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.7.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.7.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.7.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.7.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.8.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.8.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.8.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "fd2c1bfd2d786ccb331b888eb9f73e1c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.8.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.8.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.8.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.8.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.8.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.8.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.8.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.8.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.8.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "2cf646d3acf339491c61102613b2e9b4" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.8.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.8.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.9.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.9.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.9.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.9.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.9.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.9.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.9.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.9.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.9.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "0ad54d18051230c2889d3c8291d3ab7c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29511680, + "records": [ + { + "name": "language_model.transformer.h.9.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.9.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.9.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.9.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.9.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.11.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29496320 + } + ], + "md5sum": "4698df138abdf2593fb68e0b373f5687" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.11.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.11.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.11.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.11.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.11.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.11.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.11.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.11.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.11.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "cefe0dcf582b5d6f96e8f3fa6281c166" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.11.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.11.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.12.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.12.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.12.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.12.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.12.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.12.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.12.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.12.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.12.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "032014ae960649835b50e6e292adb87f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.12.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.12.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.12.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.12.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.12.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.13.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.13.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.13.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "7448a05fb7a28ed6530df8ca9ed33e64" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.13.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.13.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.13.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.13.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.13.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.13.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.13.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.13.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.13.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "b7dda0e92f11b96cb41e629b4d4002de" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.13.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.13.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.14.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.14.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.14.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.14.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.14.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.14.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.14.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.14.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.14.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "20a06f1825609b01d6f8f1952d3ee6ba" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.14.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.14.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.14.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.14.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.14.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.15.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.15.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.15.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "a5072021503bea164a4dcd6a264fca16" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.15.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.15.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.15.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.15.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.15.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.15.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.15.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.15.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.15.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "7fd195702f009ba03bce82bc8fa2a101" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.15.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.15.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.16.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.16.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.16.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.16.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.16.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.16.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.16.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.16.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.16.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "a6c1e4e14dbde02b6c92f011f3a8bf13" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.16.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.16.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.16.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.16.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.16.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.17.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.17.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.17.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "8d762b393bc9d1b8da2121a0a5459b48" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.17.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.17.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.17.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.17.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.17.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.17.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.17.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.17.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.17.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "5f677c36cd7802b7e9a737366a9b198f" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.17.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.17.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.18.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.18.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.18.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.18.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.18.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.18.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.18.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.18.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.18.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "7846adcbabf9e2a1cdaf84e9a52d0dd5" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.18.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.18.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.18.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.18.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.18.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.19.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.19.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.19.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "e1fd2aa19e097365ccfbde26c95114f6" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.19.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.19.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.19.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.19.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.19.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.19.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.19.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.19.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.19.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "d50ec00e50f3c7812565d12499b11888" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.19.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.19.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.20.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.20.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.20.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.20.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.20.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.20.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.20.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.20.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.20.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "21bd670e93c4f3af12cde025bcc5985f" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.20.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.20.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.20.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.20.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.20.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.21.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.21.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.21.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "4987bdd31d44c27df9450e6c8f9dd5c2" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.21.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.21.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.21.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.21.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.21.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.21.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.21.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.21.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.21.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "9374265354624d4eb77ce1eb2dd43fa0" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.21.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.21.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.22.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.22.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.22.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.22.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.22.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.22.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.22.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.22.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.22.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "8b75b3cc5bf1844a1e3977c68a76ece8" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.22.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.22.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.22.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.22.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.22.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.23.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.23.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.23.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "8b4a5eb088406f4ee59f108bc811e9a3" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.23.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.23.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.23.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.23.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.23.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.23.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.23.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.23.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.23.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "3dcd6ab0afaebdce7888eebde02d4904" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.23.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.23.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.24.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.24.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.24.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.24.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.24.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.24.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.24.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.24.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.24.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "3cb43ce6c143a67e963144c2fcad3fb1" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.24.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.24.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.24.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.24.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.24.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.25.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.25.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.25.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "3e608c2dbd5240e6087a46aafce625ac" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.25.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.25.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.25.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.25.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.25.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.25.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.25.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.25.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.25.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "3f85baa8dd75b0085a1621324f11ecec" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.25.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.25.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.26.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.26.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.26.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.26.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.26.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.26.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.26.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.26.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.26.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "f3945e6c2c3355430b610eaec1efb214" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.26.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.26.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.26.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.26.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.26.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.27.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.27.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.27.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "8b1f3f89070d3a04eab416a53c1875d3" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.27.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.27.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.27.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.27.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.27.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.27.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.27.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.27.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.27.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "68a647077df6bc97a7c22c3ad13c975f" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.27.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.27.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.28.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.28.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.28.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.28.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.28.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.28.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.28.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.28.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.28.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "122c4cefd0eb1812c8b4efece37f09a5" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.28.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.28.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.28.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.28.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.28.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.29.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.29.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.29.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "bece528f5ad344735d92ccbca25dd0c9" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.29.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.29.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.29.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.29.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.29.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.29.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.29.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.29.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.29.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "cc01e9d72515427008e9057caa495e0e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29542400, + "records": [ + { + "name": "language_model.transformer.h.29.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.29.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.30.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.30.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.30.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 14755840 + }, + { + "name": "language_model.transformer.h.30.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.30.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "language_model.transformer.h.30.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25830400 + }, + { + "name": "language_model.transformer.h.30.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25835520 + }, + { + "name": "language_model.transformer.h.30.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29112320 + }, + { + "name": "language_model.transformer.h.30.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29521920 + } + ], + "md5sum": "e6257c8ef70eaee0610bfb8d574a2ee7" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.30.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.30.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.transformer.h.30.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.transformer.h.30.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.30.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27857920 + }, + { + "name": "language_model.transformer.h.31.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29496320 + }, + { + "name": "language_model.transformer.h.31.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29501440 + }, + { + "name": "language_model.transformer.h.31.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 29506560 + } + ], + "md5sum": "4f6f1b71dc2cd0e10f8d7ccbcb094ffe" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29521920, + "records": [ + { + "name": "language_model.transformer.h.31.mixer.Wqkv.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.31.mixer.Wqkv.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "language_model.transformer.h.31.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 11059200 + }, + { + "name": "language_model.transformer.h.31.mixer.out_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11064320 + }, + { + "name": "language_model.transformer.h.31.mixer.out_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14341120 + }, + { + "name": "language_model.transformer.h.31.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14750720 + }, + { + "name": "language_model.transformer.h.31.mlp.fc1.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14771200 + }, + { + "name": "language_model.transformer.h.31.mlp.fc1.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27878400 + }, + { + "name": "language_model.transformer.h.31.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + } + ], + "md5sum": "98e74dfa9c7ef0b482d250512351377f" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33454848, + "records": [ + { + "name": "language_model.transformer.h.31.mlp.fc2.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.31.mlp.fc2.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "vision_tower.vision_model.embeddings.patch_embedding.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14745600 + }, + { + "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", + "shape": [ + 1152, + 3, + 14, + 14 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1354752, + "byteOffset": 14747904 + }, + { + "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", + "shape": [ + 196, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 112896, + "byteOffset": 16102656 + }, + { + "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", + "shape": [ + 196, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14112, + "byteOffset": 16215552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16229664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16231968 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16234272 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16236576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 16238880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 16247488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 18726592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 19036480 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 19038784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 21527104 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 21838144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 21840448 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 22504000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 22586944 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 22589248 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 23252800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 23335744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 23338048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24001600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24084544 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 24086848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24750400 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24833344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24835648 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24837952 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24840256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 24842560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 24851168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 27330272 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27640160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 27642464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 30130784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 30441824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 30444128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 31107680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 31190624 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 31192928 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 31856480 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 31939424 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 31941728 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 32605280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32688224 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 32690528 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 33354080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33437024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33439328 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33441632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33443936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 33446240 + } + ], + "md5sum": "daa3db44fa9657e04c6c87ce5fe95281" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32901600, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 2479104 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2788992 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 2791296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 5279616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5590656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 5592960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 6256512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 6339456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 6341760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 7005312 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7088256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7090560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 7754112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7837056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7839360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 8502912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8585856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8588160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8590464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8592768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 8595072 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 8603680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 11082784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 11392672 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 11394976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 13883296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14194336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 14196640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 14860192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14943136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 14945440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 15608992 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 15691936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 15694240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 16357792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16440736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 16443040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 17106592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17189536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17191840 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17194144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17196448 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 17198752 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 17207360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 19686464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 19996352 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 19998656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 22486976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 22798016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 22800320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 23463872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 23546816 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 23549120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24212672 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24295616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 24297920 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24961472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25044416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 25046720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 25710272 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25793216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25795520 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25797824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25800128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 25802432 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 25811040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 28290144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28600032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 28602336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 31090656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 31401696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 31404000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 32067552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32150496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 32152800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 32816352 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32899296 + } + ], + "md5sum": "eb475175621c15ddfed9791a7943193c" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 32917120, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 663552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 746496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 748800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 1412352 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1495296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1497600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1499904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1502208 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 1504512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 1513120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 3992224 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 4302112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 4304416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 6792736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7103776 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7106080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 7769632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7852576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7854880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 8518432 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8601376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 8603680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 9267232 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 9350176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 9352480 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 10016032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10098976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10101280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10103584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10105888 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 10108192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 10116800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 12595904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 12905792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 12908096 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 15396416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 15707456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 15709760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 16373312 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16456256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 16458560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 17122112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17205056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 17207360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 17870912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17953856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 17956160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 18619712 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 18702656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 18704960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 18707264 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 18709568 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 18711872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 18720480 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 21199584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 21509472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 21511776 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 24000096 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24311136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 24313440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24976992 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25059936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 25062240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 25725792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25808736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 25811040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 26474592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 26557536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 26559840 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 27223392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27306336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27308640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27310944 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27313248 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 27315552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 27324160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 29803264 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 30113152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 30115456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 32603776 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32914816 + } + ], + "md5sum": "e76348b6be227f0e9de574b53875980c" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 31613056, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 663552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 746496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 748800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 1412352 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1495296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 1497600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 2161152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2244096 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 2246400 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 2909952 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2992896 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2995200 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2997504 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2999808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 3002112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 3010720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 5489824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5799712 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 5802016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 8290336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8601376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 8603680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 9267232 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 9350176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 9352480 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 10016032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10098976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 10101280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 10764832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10847776 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 10850080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 11513632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 11596576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 11598880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 11601184 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 11603488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 11605792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 11614400 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 14093504 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14403392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 14405696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 16894016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17205056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 17207360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 17870912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17953856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 17956160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 18619712 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 18702656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 18704960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 19368512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 19451456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 19453760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 20117312 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20200256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20202560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20204864 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20207168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 20209472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 20218080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 22697184 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 23007072 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 23009376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 25497696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25808736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 25811040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 26474592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 26557536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 26559840 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 27223392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27306336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 27308640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 27972192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28055136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 28057440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 28720992 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28803936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28806240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28808544 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28810848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 28813152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 28821760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 31300864 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 31610752 + } + ], + "md5sum": "7366127355009a12b4d6ac66d9fdfc76" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33121024, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 2488320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2799360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 2801664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 3465216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 3548160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 3550464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 4214016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 4296960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 4299264 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 4962816 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5045760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 5048064 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 5711616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5794560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5796864 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5799168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5801472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 5803776 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 5812384 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 8291488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8601376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 8603680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 11092000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 11403040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 11405344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 12068896 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 12151840 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 12154144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 12817696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 12900640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 12902944 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 13566496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 13649440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 13651744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 14315296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14398240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14400544 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14402848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14405152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 14407456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 14416064 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 16895168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17205056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 17207360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 19695680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20006720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 20009024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 20672576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 20755520 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 20757824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 21421376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 21504320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 21506624 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 22170176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 22253120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 22255424 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 22918976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 23001920 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 23004224 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 23667776 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 23750720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 23753024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24416576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24499520 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24501824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24504128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24506432 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 24508736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 24517344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 26996448 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 27306336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 27308640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 29796960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 30108000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 30110304 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 30773856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 30856800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 30859104 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 31522656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 31605600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 31607904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 32271456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32354400 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 32356704 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 33020256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33103200 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33105504 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33107808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 33110112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 33112416 + } + ], + "md5sum": "82f0c3a3d85fc9c1ac8b7f293053a0ae" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32901600, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 2479104 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 2788992 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 2791296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 5279616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 5590656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 5592960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 6256512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 6339456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 6341760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 7005312 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7088256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7090560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 7754112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7837056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7839360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 8502912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8585856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8588160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8590464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8592768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 8595072 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 8603680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 11082784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 11392672 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 11394976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 13883296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14194336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 14196640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 14860192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 14943136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 14945440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 15608992 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 15691936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 15694240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 16357792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16440736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 16443040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 17106592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17189536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17191840 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17194144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17196448 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 17198752 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 17207360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 19686464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 19996352 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 19998656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 22486976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 22798016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 22800320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 23463872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 23546816 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 23549120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24212672 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 24295616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 24297920 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 24961472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25044416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 25046720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 25710272 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25793216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25795520 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25797824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 25800128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 25802432 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 25811040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 28290144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 28600032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 28602336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 31090656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 31401696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 31404000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 32067552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32150496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 32152800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 32816352 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 32899296 + } + ], + "md5sum": "cdd2e2940efafd0c22942bbded3b9dc2" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 18702656, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 663552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 746496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 748800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 1412352 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1495296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1497600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1499904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 1502208 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 1504512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 1513120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 3992224 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 4302112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 4304416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 6792736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7103776 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7106080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 7769632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 7852576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 7854880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 8518432 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 8601376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 8603680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 9267232 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 9350176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 9352480 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 10016032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10098976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10101280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10103584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 10105888 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", + "shape": [ + 4304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8608, + "byteOffset": 10108192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", + "shape": [ + 4304, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2479104, + "byteOffset": 10116800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", + "shape": [ + 4304, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 309888, + "byteOffset": 12595904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 12905792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", + "shape": [ + 1152, + 540 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2488320, + "byteOffset": 12908096 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", + "shape": [ + 1152, + 135 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 311040, + "byteOffset": 15396416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 15707456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 15709760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 16373312 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 16456256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 16458560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 17122112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17205056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 17207360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 17870912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", + "shape": [ + 1152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2304, + "byteOffset": 17953856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 1152, + 144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 663552, + "byteOffset": 17956160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 1152, + 36 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 82944, + "byteOffset": 18619712 + } + ], + "md5sum": "babdc4bc9577f80abb8df17f61817dfc" + } + ] +} \ No newline at end of file