diff --git "a/qwen_FFN_PF_lut8_chunk_01of01.mlmodelc/model.mil" "b/qwen_FFN_PF_lut8_chunk_01of01.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/qwen_FFN_PF_lut8_chunk_01of01.mlmodelc/model.mil" @@ -0,0 +1,13803 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2228416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3277056))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3342656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4391296))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4456896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7602688))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7799360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10945152))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11141824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14287616))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14353216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16450432))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16581568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17630208))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17695808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18744448))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18810048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21955840))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22152512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25298304))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25494976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28640768))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30803584))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30934720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31983360))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32048960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33097600))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33163200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36308992))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36505664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39651456))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39848128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42993920))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43059520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45156736))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45287872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46336512))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46402112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47450752))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47516352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50662144))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50858816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54004608))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54201280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57347072))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57412672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59509888))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59641024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60689664))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60755264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61803904))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61869504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65015296))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65211968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68357760))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68554432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71700224))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73863040))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73994176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75042816))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75108416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76157056))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76222656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79368448))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79565120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82710912))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82907584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86053376))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86118976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88216192))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88347328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89395968))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89461568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90510208))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90575808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93721600))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93918272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97064064))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97260736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100406528))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100472128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102569344))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102700480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103749120))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103814720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104863360))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104928960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108074752))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108271424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111417216))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111613888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114759680))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114825280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116922496))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117053632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118102272))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118167872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119216512))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119282112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122427904))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122624576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125770368))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125967040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129112832))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129178432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131275648))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131406784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132455424))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132521024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133569664))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133635264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136781056))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136977728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140123520))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140320192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143465984))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143531584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145628800))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145759936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146808576))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146874176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922816))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147988416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151134208))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151330880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154476672))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154673344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157819136))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157884736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159981952))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160113088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161161728))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161227328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162275968))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162341568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165487360))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165684032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168829824))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169026496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172172288))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172237888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174335104))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174466240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175514880))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175580480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176629120))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176694720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179840512))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180037184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183182976))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183379648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186525440))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186591040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188688256))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188819392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189868032))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189933632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190982272))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191047872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194193664))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194390336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197536128))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197732800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200878592))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200944192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203041408))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203172544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204221184))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204286784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205335424))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205401024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208546816))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208743488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211889280))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212085952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215231744))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215297344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217394560))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217525696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218574336))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218639936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219688576))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222899968))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223096640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226242432))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226439104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229584896))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229650496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231747712))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232927488))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232993088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234041728))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234107328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237253120))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237449792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240595584))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240792256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243938048))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244003648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246100864))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246232000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247280640))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247346240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248394880))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248460480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251606272))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251802944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254948736))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255145408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258291200))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258356800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260454016))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260585152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261633792))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261699392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262748032))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265959424))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266156096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269301888))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269498560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272644352))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272709952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274807168))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274938304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275986944))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276052544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277101184))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277166784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280312576))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280509248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283655040))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283851712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286997504))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287063104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289160320))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289291456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290340096))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290405696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291454336))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291519936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294665728))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294862400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298008192))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298204864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301350656))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301416256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303513472))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303644608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304693248))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304758848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305807488))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305873088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309018880))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309215552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312361344))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312558016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315703808))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315769408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317866624))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317997760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319046400))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319112000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320160640))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320226240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323372032))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323568704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326714496))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326911168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330056960))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330122560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332219776))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332350912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333399552))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333465152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334513792))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337725184))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337921856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341067648))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341264320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344410112))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344475712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346572928))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346704064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347752704))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347818304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348866944))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348932544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352078336))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352275008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355420800))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355617472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358763264))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358828864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360926080))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361057216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362105856))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362171456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363220096))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363285696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366431488))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366628160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369773952))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369970624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373116416))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373182016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375279232))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375410368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376459008))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376524608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377573248))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377638848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380784640))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380981312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384127104))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384323776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387469568))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_27_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387535168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389632384))))[name = string("model_model_layers_27_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_27_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389763520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390812160))))[name = string("model_model_layers_27_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_27_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390877760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391926400))))[name = string("model_model_layers_27_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_27_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391992000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395137792))))[name = string("model_model_layers_27_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_27_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395334464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398480256))))[name = string("model_model_layers_27_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_27_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398676928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401822720))))[name = string("model_model_layers_27_mlp_down_proj_weight_palettized")]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(8192)]; + tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_1503_axis_0 = const()[name = string("op_1503_axis_0"), val = int32(1)]; + int32 var_1503_batch_dims_0 = const()[name = string("op_1503_batch_dims_0"), val = int32(0)]; + bool var_1503_validate_indices_0 = const()[name = string("op_1503_validate_indices_0"), val = bool(false)]; + tensor var_1495_to_fp16 = const()[name = string("op_1495_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401888320)))]; + string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; + tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_1")]; + tensor var_1503_cast_fp16_cast_uint16 = gather(axis = var_1503_axis_0, batch_dims = var_1503_batch_dims_0, indices = select_0_to_int16, validate_indices = var_1503_validate_indices_0, x = var_1495_to_fp16)[name = string("op_1503_cast_fp16_cast_uint16")]; + tensor var_1508 = const()[name = string("op_1508"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_1508, x = var_1503_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; + int32 var_1518_axis_0 = const()[name = string("op_1518_axis_0"), val = int32(1)]; + int32 var_1518_batch_dims_0 = const()[name = string("op_1518_batch_dims_0"), val = int32(0)]; + bool var_1518_validate_indices_0 = const()[name = string("op_1518_validate_indices_0"), val = bool(false)]; + tensor var_1510_to_fp16 = const()[name = string("op_1510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403985536)))]; + string select_0_to_uint16_dtype_0 = const()[name = string("select_0_to_uint16_dtype_0"), val = string("uint16")]; + tensor select_0_to_uint16 = cast(dtype = select_0_to_uint16_dtype_0, x = select_0)[name = string("cast_0")]; + tensor var_1518_cast_fp16_cast_uint16 = gather(axis = var_1518_axis_0, batch_dims = var_1518_batch_dims_0, indices = select_0_to_uint16, validate_indices = var_1518_validate_indices_0, x = var_1510_to_fp16)[name = string("op_1518_cast_fp16_cast_uint16")]; + tensor var_1523 = const()[name = string("op_1523"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_1523, x = var_1518_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; + int32 var_1544 = const()[name = string("op_1544"), val = int32(-1)]; + fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1546_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_1546_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_1544, interleave = input_1_interleave_0, values = (hidden_states, var_1546_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_1541_to_fp16 = const()[name = string("op_1541_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1541_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_3_cast_fp16 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406082752)))]; + tensor hidden_states_3_cast_fp16 = mul(x = normed_3_cast_fp16, y = const_3_promoted_to_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor var_1563 = const()[name = string("op_1563"), val = tensor([0, 2, 1])]; + tensor var_1566_axes_0 = const()[name = string("op_1566_axes_0"), val = tensor([2])]; + tensor var_1564_cast_fp16 = transpose(perm = var_1563, x = hidden_states_3_cast_fp16)[name = string("transpose_167")]; + tensor var_1566_cast_fp16 = expand_dims(axes = var_1566_axes_0, x = var_1564_cast_fp16)[name = string("op_1566_cast_fp16")]; + string var_1582_pad_type_0 = const()[name = string("op_1582_pad_type_0"), val = string("valid")]; + tensor var_1582_strides_0 = const()[name = string("op_1582_strides_0"), val = tensor([1, 1])]; + tensor var_1582_pad_0 = const()[name = string("op_1582_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1582_dilations_0 = const()[name = string("op_1582_dilations_0"), val = tensor([1, 1])]; + int32 var_1582_groups_0 = const()[name = string("op_1582_groups_0"), val = int32(1)]; + tensor var_1582 = conv(dilations = var_1582_dilations_0, groups = var_1582_groups_0, pad = var_1582_pad_0, pad_type = var_1582_pad_type_0, strides = var_1582_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1566_cast_fp16)[name = string("op_1582")]; + tensor var_1587 = const()[name = string("op_1587"), val = tensor([1, 16, 1, 128])]; + tensor var_1588 = reshape(shape = var_1587, x = var_1582)[name = string("op_1588")]; + string var_1604_pad_type_0 = const()[name = string("op_1604_pad_type_0"), val = string("valid")]; + tensor var_1604_strides_0 = const()[name = string("op_1604_strides_0"), val = tensor([1, 1])]; + tensor var_1604_pad_0 = const()[name = string("op_1604_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1604_dilations_0 = const()[name = string("op_1604_dilations_0"), val = tensor([1, 1])]; + int32 var_1604_groups_0 = const()[name = string("op_1604_groups_0"), val = int32(1)]; + tensor var_1604 = conv(dilations = var_1604_dilations_0, groups = var_1604_groups_0, pad = var_1604_pad_0, pad_type = var_1604_pad_type_0, strides = var_1604_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1566_cast_fp16)[name = string("op_1604")]; + tensor var_1609 = const()[name = string("op_1609"), val = tensor([1, 8, 1, 128])]; + tensor var_1610 = reshape(shape = var_1609, x = var_1604)[name = string("op_1610")]; + string var_1626_pad_type_0 = const()[name = string("op_1626_pad_type_0"), val = string("valid")]; + tensor var_1626_strides_0 = const()[name = string("op_1626_strides_0"), val = tensor([1, 1])]; + tensor var_1626_pad_0 = const()[name = string("op_1626_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1626_dilations_0 = const()[name = string("op_1626_dilations_0"), val = tensor([1, 1])]; + int32 var_1626_groups_0 = const()[name = string("op_1626_groups_0"), val = int32(1)]; + tensor var_1626 = conv(dilations = var_1626_dilations_0, groups = var_1626_groups_0, pad = var_1626_pad_0, pad_type = var_1626_pad_type_0, strides = var_1626_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1566_cast_fp16)[name = string("op_1626")]; + tensor var_1631 = const()[name = string("op_1631"), val = tensor([1, 8, 1, 128])]; + tensor var_1632 = reshape(shape = var_1631, x = var_1626)[name = string("op_1632")]; + int32 var_1647 = const()[name = string("op_1647"), val = int32(-1)]; + fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; + tensor var_1649 = mul(x = var_1588, y = const_4_promoted)[name = string("op_1649")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_1647, interleave = input_5_interleave_0, values = (var_1588, var_1649))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1644_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; + tensor const_7 = const()[name = string("const_7"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406084864)))]; + tensor q_1 = mul(x = normed_7, y = const_7)[name = string("q_1")]; + int32 var_1672 = const()[name = string("op_1672"), val = int32(-1)]; + fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; + tensor var_1674 = mul(x = var_1610, y = const_8_promoted)[name = string("op_1674")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_1672, interleave = input_7_interleave_0, values = (var_1610, var_1674))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_1669_to_fp16 = const()[name = string("op_1669_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1669_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; + tensor const_11 = const()[name = string("const_11"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406085184)))]; + tensor k_1 = mul(x = normed_11, y = const_11)[name = string("k_1")]; + tensor var_1688 = mul(x = q_1, y = cos_1_cast_fp16)[name = string("op_1688")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1)[name = string("x2_1")]; + fp16 const_14_promoted = const()[name = string("const_14_promoted"), val = fp16(-0x1p+0)]; + tensor var_1709 = mul(x = x2_1, y = const_14_promoted)[name = string("op_1709")]; + int32 var_1711 = const()[name = string("op_1711"), val = int32(-1)]; + bool var_1712_interleave_0 = const()[name = string("op_1712_interleave_0"), val = bool(false)]; + tensor var_1712 = concat(axis = var_1711, interleave = var_1712_interleave_0, values = (var_1709, x1_1))[name = string("op_1712")]; + tensor var_1713 = mul(x = var_1712, y = sin_1_cast_fp16)[name = string("op_1713")]; + tensor query_states_1 = add(x = var_1688, y = var_1713)[name = string("query_states_1")]; + tensor var_1716 = mul(x = k_1, y = cos_1_cast_fp16)[name = string("op_1716")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1)[name = string("x2_3")]; + fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; + tensor var_1737 = mul(x = x2_3, y = const_17_promoted)[name = string("op_1737")]; + int32 var_1739 = const()[name = string("op_1739"), val = int32(-1)]; + bool var_1740_interleave_0 = const()[name = string("op_1740_interleave_0"), val = bool(false)]; + tensor var_1740 = concat(axis = var_1739, interleave = var_1740_interleave_0, values = (var_1737, x1_3))[name = string("op_1740")]; + tensor var_1741 = mul(x = var_1740, y = sin_1_cast_fp16)[name = string("op_1741")]; + tensor key_states_1 = add(x = var_1716, y = var_1741)[name = string("key_states_1")]; + int32 var_1745 = const()[name = string("op_1745"), val = int32(1)]; + tensor var_1746 = add(x = current_pos, y = var_1745)[name = string("op_1746")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1746, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_1, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1746, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_1632, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; + tensor var_1796_begin_0 = const()[name = string("op_1796_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1796_end_0 = const()[name = string("op_1796_end_0"), val = tensor([1, 8, 4096, 128])]; + tensor var_1796_end_mask_0 = const()[name = string("op_1796_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1796_cast_fp16 = slice_by_index(begin = var_1796_begin_0, end = var_1796_end_0, end_mask = var_1796_end_mask_0, x = coreml_update_state_57)[name = string("op_1796_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1796_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_1803_begin_0 = const()[name = string("op_1803_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_1803_end_0 = const()[name = string("op_1803_end_0"), val = tensor([29, 8, 4096, 128])]; + tensor var_1803_end_mask_0 = const()[name = string("op_1803_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1803_cast_fp16 = slice_by_index(begin = var_1803_begin_0, end = var_1803_end_0, end_mask = var_1803_end_mask_0, x = coreml_update_state_57)[name = string("op_1803_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1803_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_3_axes_0 = const()[name = string("x_3_axes_0"), val = tensor([1])]; + tensor x_3_cast_fp16 = expand_dims(axes = x_3_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_1840 = const()[name = string("op_1840"), val = tensor([1, 2, 1, 1])]; + tensor x_5_cast_fp16 = tile(reps = var_1840, x = x_3_cast_fp16)[name = string("x_5_cast_fp16")]; + tensor var_1852 = const()[name = string("op_1852"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_3_cast_fp16 = reshape(shape = var_1852, x = x_5_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor x_9_axes_0 = const()[name = string("x_9_axes_0"), val = tensor([1])]; + tensor x_9_cast_fp16 = expand_dims(axes = x_9_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_1860 = const()[name = string("op_1860"), val = tensor([1, 2, 1, 1])]; + tensor x_11_cast_fp16 = tile(reps = var_1860, x = x_9_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_1872 = const()[name = string("op_1872"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_3_cast_fp16 = reshape(shape = var_1872, x = x_11_cast_fp16)[name = string("value_states_3_cast_fp16")]; + bool var_1887_transpose_x_1 = const()[name = string("op_1887_transpose_x_1"), val = bool(false)]; + bool var_1887_transpose_y_1 = const()[name = string("op_1887_transpose_y_1"), val = bool(true)]; + tensor var_1887 = matmul(transpose_x = var_1887_transpose_x_1, transpose_y = var_1887_transpose_y_1, x = query_states_1, y = key_states_3_cast_fp16)[name = string("op_1887")]; + fp16 var_1888_to_fp16 = const()[name = string("op_1888_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_1887, y = var_1888_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; + int32 var_1923 = const()[name = string("op_1923"), val = int32(-1)]; + tensor attn_weights_5_cast_fp16 = softmax(axis = var_1923, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_1934_perm_0 = const()[name = string("op_1934_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1938 = const()[name = string("op_1938"), val = tensor([1, 1, 2048])]; + tensor var_1934_cast_fp16 = transpose(perm = var_1934_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_166")]; + tensor attn_output_5_cast_fp16 = reshape(shape = var_1938, x = var_1934_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_1943 = const()[name = string("op_1943"), val = tensor([0, 2, 1])]; + string var_1959_pad_type_0 = const()[name = string("op_1959_pad_type_0"), val = string("valid")]; + int32 var_1959_groups_0 = const()[name = string("op_1959_groups_0"), val = int32(1)]; + tensor var_1959_strides_0 = const()[name = string("op_1959_strides_0"), val = tensor([1])]; + tensor var_1959_pad_0 = const()[name = string("op_1959_pad_0"), val = tensor([0, 0])]; + tensor var_1959_dilations_0 = const()[name = string("op_1959_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406085504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408182720))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1944_cast_fp16 = transpose(perm = var_1943, x = attn_output_5_cast_fp16)[name = string("transpose_165")]; + tensor var_1959_cast_fp16 = conv(dilations = var_1959_dilations_0, groups = var_1959_groups_0, pad = var_1959_pad_0, pad_type = var_1959_pad_type_0, strides = var_1959_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1944_cast_fp16)[name = string("op_1959_cast_fp16")]; + tensor var_1963 = const()[name = string("op_1963"), val = tensor([0, 2, 1])]; + tensor attn_output_9_cast_fp16 = transpose(perm = var_1963, x = var_1959_cast_fp16)[name = string("transpose_164")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + int32 var_1976 = const()[name = string("op_1976"), val = int32(-1)]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1978_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_1978_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_1976, interleave = input_11_interleave_0, values = (hidden_states_9_cast_fp16, var_1978_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_1973_to_fp16 = const()[name = string("op_1973_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_1973_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; + tensor const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408248320)))]; + tensor x_13_cast_fp16 = mul(x = normed_15_cast_fp16, y = const_29_promoted_to_fp16)[name = string("x_13_cast_fp16")]; + tensor var_2003 = const()[name = string("op_2003"), val = tensor([0, 2, 1])]; + tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; + tensor var_2004 = transpose(perm = var_2003, x = x_13_cast_fp16)[name = string("transpose_163")]; + tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_2004)[name = string("input_13")]; + string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; + tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; + tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; + int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; + tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; + string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; + tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; + tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; + int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; + tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; + tensor c_1 = silu(x = input_15)[name = string("c_1")]; + tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; + string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; + tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; + tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; + int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; + tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; + tensor var_2026_axes_0 = const()[name = string("op_2026_axes_0"), val = tensor([2])]; + tensor var_2026 = squeeze(axes = var_2026_axes_0, x = e_1)[name = string("op_2026")]; + tensor var_2027 = const()[name = string("op_2027"), val = tensor([0, 2, 1])]; + tensor var_2028 = transpose(perm = var_2027, x = var_2026)[name = string("transpose_162")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = var_2028)[name = string("hidden_states_11_cast_fp16")]; + int32 var_2040 = const()[name = string("op_2040"), val = int32(-1)]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2042_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2042_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_2040, interleave = input_19_interleave_0, values = (hidden_states_11_cast_fp16, var_2042_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_2037_to_fp16 = const()[name = string("op_2037_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2037_to_fp16, x = input_19_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; + tensor const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408250432)))]; + tensor hidden_states_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = const_33_promoted_to_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor var_2059 = const()[name = string("op_2059"), val = tensor([0, 2, 1])]; + tensor var_2062_axes_0 = const()[name = string("op_2062_axes_0"), val = tensor([2])]; + tensor var_2060_cast_fp16 = transpose(perm = var_2059, x = hidden_states_13_cast_fp16)[name = string("transpose_161")]; + tensor var_2062_cast_fp16 = expand_dims(axes = var_2062_axes_0, x = var_2060_cast_fp16)[name = string("op_2062_cast_fp16")]; + string var_2078_pad_type_0 = const()[name = string("op_2078_pad_type_0"), val = string("valid")]; + tensor var_2078_strides_0 = const()[name = string("op_2078_strides_0"), val = tensor([1, 1])]; + tensor var_2078_pad_0 = const()[name = string("op_2078_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2078_dilations_0 = const()[name = string("op_2078_dilations_0"), val = tensor([1, 1])]; + int32 var_2078_groups_0 = const()[name = string("op_2078_groups_0"), val = int32(1)]; + tensor var_2078 = conv(dilations = var_2078_dilations_0, groups = var_2078_groups_0, pad = var_2078_pad_0, pad_type = var_2078_pad_type_0, strides = var_2078_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2062_cast_fp16)[name = string("op_2078")]; + tensor var_2083 = const()[name = string("op_2083"), val = tensor([1, 16, 1, 128])]; + tensor var_2084 = reshape(shape = var_2083, x = var_2078)[name = string("op_2084")]; + string var_2100_pad_type_0 = const()[name = string("op_2100_pad_type_0"), val = string("valid")]; + tensor var_2100_strides_0 = const()[name = string("op_2100_strides_0"), val = tensor([1, 1])]; + tensor var_2100_pad_0 = const()[name = string("op_2100_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2100_dilations_0 = const()[name = string("op_2100_dilations_0"), val = tensor([1, 1])]; + int32 var_2100_groups_0 = const()[name = string("op_2100_groups_0"), val = int32(1)]; + tensor var_2100 = conv(dilations = var_2100_dilations_0, groups = var_2100_groups_0, pad = var_2100_pad_0, pad_type = var_2100_pad_type_0, strides = var_2100_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2062_cast_fp16)[name = string("op_2100")]; + tensor var_2105 = const()[name = string("op_2105"), val = tensor([1, 8, 1, 128])]; + tensor var_2106 = reshape(shape = var_2105, x = var_2100)[name = string("op_2106")]; + string var_2122_pad_type_0 = const()[name = string("op_2122_pad_type_0"), val = string("valid")]; + tensor var_2122_strides_0 = const()[name = string("op_2122_strides_0"), val = tensor([1, 1])]; + tensor var_2122_pad_0 = const()[name = string("op_2122_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2122_dilations_0 = const()[name = string("op_2122_dilations_0"), val = tensor([1, 1])]; + int32 var_2122_groups_0 = const()[name = string("op_2122_groups_0"), val = int32(1)]; + tensor var_2122 = conv(dilations = var_2122_dilations_0, groups = var_2122_groups_0, pad = var_2122_pad_0, pad_type = var_2122_pad_type_0, strides = var_2122_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2062_cast_fp16)[name = string("op_2122")]; + tensor var_2127 = const()[name = string("op_2127"), val = tensor([1, 8, 1, 128])]; + tensor var_2128 = reshape(shape = var_2127, x = var_2122)[name = string("op_2128")]; + int32 var_2143 = const()[name = string("op_2143"), val = int32(-1)]; + fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; + tensor var_2145 = mul(x = var_2084, y = const_34_promoted)[name = string("op_2145")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23 = concat(axis = var_2143, interleave = input_23_interleave_0, values = (var_2084, var_2145))[name = string("input_23")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_2140_to_fp16 = const()[name = string("op_2140_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2140_to_fp16, x = input_23)[name = string("normed_21_cast_fp16")]; + tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_23 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23")]; + tensor const_37 = const()[name = string("const_37"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408252544)))]; + tensor q_3 = mul(x = normed_23, y = const_37)[name = string("q_3")]; + int32 var_2168 = const()[name = string("op_2168"), val = int32(-1)]; + fp16 const_38_promoted = const()[name = string("const_38_promoted"), val = fp16(-0x1p+0)]; + tensor var_2170 = mul(x = var_2106, y = const_38_promoted)[name = string("op_2170")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25 = concat(axis = var_2168, interleave = input_25_interleave_0, values = (var_2106, var_2170))[name = string("input_25")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_2165_to_fp16 = const()[name = string("op_2165_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2165_to_fp16, x = input_25)[name = string("normed_25_cast_fp16")]; + tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_27 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27")]; + tensor const_41 = const()[name = string("const_41"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408252864)))]; + tensor k_3 = mul(x = normed_27, y = const_41)[name = string("k_3")]; + tensor var_2184 = mul(x = q_3, y = cos_1_cast_fp16)[name = string("op_2184")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3)[name = string("x2_5")]; + fp16 const_44_promoted = const()[name = string("const_44_promoted"), val = fp16(-0x1p+0)]; + tensor var_2205 = mul(x = x2_5, y = const_44_promoted)[name = string("op_2205")]; + int32 var_2207 = const()[name = string("op_2207"), val = int32(-1)]; + bool var_2208_interleave_0 = const()[name = string("op_2208_interleave_0"), val = bool(false)]; + tensor var_2208 = concat(axis = var_2207, interleave = var_2208_interleave_0, values = (var_2205, x1_5))[name = string("op_2208")]; + tensor var_2209 = mul(x = var_2208, y = sin_1_cast_fp16)[name = string("op_2209")]; + tensor query_states_5 = add(x = var_2184, y = var_2209)[name = string("query_states_5")]; + tensor var_2212 = mul(x = k_3, y = cos_1_cast_fp16)[name = string("op_2212")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3)[name = string("x2_7")]; + fp16 const_47_promoted = const()[name = string("const_47_promoted"), val = fp16(-0x1p+0)]; + tensor var_2233 = mul(x = x2_7, y = const_47_promoted)[name = string("op_2233")]; + int32 var_2235 = const()[name = string("op_2235"), val = int32(-1)]; + bool var_2236_interleave_0 = const()[name = string("op_2236_interleave_0"), val = bool(false)]; + tensor var_2236 = concat(axis = var_2235, interleave = var_2236_interleave_0, values = (var_2233, x1_7))[name = string("op_2236")]; + tensor var_2237 = mul(x = var_2236, y = sin_1_cast_fp16)[name = string("op_2237")]; + tensor key_states_5 = add(x = var_2212, y = var_2237)[name = string("key_states_5")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_1746, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_5, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_1746, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_2128, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; + tensor var_2292_begin_0 = const()[name = string("op_2292_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_2292_end_0 = const()[name = string("op_2292_end_0"), val = tensor([2, 8, 4096, 128])]; + tensor var_2292_end_mask_0 = const()[name = string("op_2292_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2292_cast_fp16 = slice_by_index(begin = var_2292_begin_0, end = var_2292_end_0, end_mask = var_2292_end_mask_0, x = coreml_update_state_59)[name = string("op_2292_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_2292_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_2299_begin_0 = const()[name = string("op_2299_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_2299_end_0 = const()[name = string("op_2299_end_0"), val = tensor([30, 8, 4096, 128])]; + tensor var_2299_end_mask_0 = const()[name = string("op_2299_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2299_cast_fp16 = slice_by_index(begin = var_2299_begin_0, end = var_2299_end_0, end_mask = var_2299_end_mask_0, x = coreml_update_state_59)[name = string("op_2299_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_2299_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_19_axes_0 = const()[name = string("x_19_axes_0"), val = tensor([1])]; + tensor x_19_cast_fp16 = expand_dims(axes = x_19_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_2336 = const()[name = string("op_2336"), val = tensor([1, 2, 1, 1])]; + tensor x_21_cast_fp16 = tile(reps = var_2336, x = x_19_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor var_2348 = const()[name = string("op_2348"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_2348, x = x_21_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_25_axes_0 = const()[name = string("x_25_axes_0"), val = tensor([1])]; + tensor x_25_cast_fp16 = expand_dims(axes = x_25_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_2356 = const()[name = string("op_2356"), val = tensor([1, 2, 1, 1])]; + tensor x_27_cast_fp16 = tile(reps = var_2356, x = x_25_cast_fp16)[name = string("x_27_cast_fp16")]; + tensor var_2368 = const()[name = string("op_2368"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_9_cast_fp16 = reshape(shape = var_2368, x = x_27_cast_fp16)[name = string("value_states_9_cast_fp16")]; + bool var_2383_transpose_x_1 = const()[name = string("op_2383_transpose_x_1"), val = bool(false)]; + bool var_2383_transpose_y_1 = const()[name = string("op_2383_transpose_y_1"), val = bool(true)]; + tensor var_2383 = matmul(transpose_x = var_2383_transpose_x_1, transpose_y = var_2383_transpose_y_1, x = query_states_5, y = key_states_7_cast_fp16)[name = string("op_2383")]; + fp16 var_2384_to_fp16 = const()[name = string("op_2384_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_2383, y = var_2384_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("attn_weights_9_cast_fp16")]; + int32 var_2419 = const()[name = string("op_2419"), val = int32(-1)]; + tensor attn_weights_11_cast_fp16 = softmax(axis = var_2419, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; + bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; + tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_9_cast_fp16)[name = string("attn_output_11_cast_fp16")]; + tensor var_2430_perm_0 = const()[name = string("op_2430_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2434 = const()[name = string("op_2434"), val = tensor([1, 1, 2048])]; + tensor var_2430_cast_fp16 = transpose(perm = var_2430_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_160")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2434, x = var_2430_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2439 = const()[name = string("op_2439"), val = tensor([0, 2, 1])]; + string var_2455_pad_type_0 = const()[name = string("op_2455_pad_type_0"), val = string("valid")]; + int32 var_2455_groups_0 = const()[name = string("op_2455_groups_0"), val = int32(1)]; + tensor var_2455_strides_0 = const()[name = string("op_2455_strides_0"), val = tensor([1])]; + tensor var_2455_pad_0 = const()[name = string("op_2455_pad_0"), val = tensor([0, 0])]; + tensor var_2455_dilations_0 = const()[name = string("op_2455_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408253184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410350400))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2440_cast_fp16 = transpose(perm = var_2439, x = attn_output_15_cast_fp16)[name = string("transpose_159")]; + tensor var_2455_cast_fp16 = conv(dilations = var_2455_dilations_0, groups = var_2455_groups_0, pad = var_2455_pad_0, pad_type = var_2455_pad_type_0, strides = var_2455_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2440_cast_fp16)[name = string("op_2455_cast_fp16")]; + tensor var_2459 = const()[name = string("op_2459"), val = tensor([0, 2, 1])]; + tensor attn_output_19_cast_fp16 = transpose(perm = var_2459, x = var_2455_cast_fp16)[name = string("transpose_158")]; + tensor hidden_states_19_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + int32 var_2472 = const()[name = string("op_2472"), val = int32(-1)]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2474_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_2474_cast_fp16")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29_cast_fp16 = concat(axis = var_2472, interleave = input_29_interleave_0, values = (hidden_states_19_cast_fp16, var_2474_cast_fp16))[name = string("input_29_cast_fp16")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_2469_to_fp16 = const()[name = string("op_2469_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2469_to_fp16, x = input_29_cast_fp16)[name = string("normed_29_cast_fp16")]; + tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_31_cast_fp16 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31_cast_fp16")]; + tensor const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410416000)))]; + tensor x_29_cast_fp16 = mul(x = normed_31_cast_fp16, y = const_59_promoted_to_fp16)[name = string("x_29_cast_fp16")]; + tensor var_2499 = const()[name = string("op_2499"), val = tensor([0, 2, 1])]; + tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; + tensor var_2500 = transpose(perm = var_2499, x = x_29_cast_fp16)[name = string("transpose_157")]; + tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_2500)[name = string("input_31")]; + string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; + tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; + tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; + int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; + tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; + string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; + tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; + tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; + int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; + tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; + tensor c_3 = silu(x = input_33)[name = string("c_3")]; + tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; + string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; + tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; + tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; + int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; + tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; + tensor var_2522_axes_0 = const()[name = string("op_2522_axes_0"), val = tensor([2])]; + tensor var_2522 = squeeze(axes = var_2522_axes_0, x = e_3)[name = string("op_2522")]; + tensor var_2523 = const()[name = string("op_2523"), val = tensor([0, 2, 1])]; + tensor var_2524 = transpose(perm = var_2523, x = var_2522)[name = string("transpose_156")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = var_2524)[name = string("hidden_states_21_cast_fp16")]; + int32 var_2536 = const()[name = string("op_2536"), val = int32(-1)]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2538_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_2538_cast_fp16")]; + bool input_37_interleave_0 = const()[name = string("input_37_interleave_0"), val = bool(false)]; + tensor input_37_cast_fp16 = concat(axis = var_2536, interleave = input_37_interleave_0, values = (hidden_states_21_cast_fp16, var_2538_cast_fp16))[name = string("input_37_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_2533_to_fp16 = const()[name = string("op_2533_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2533_to_fp16, x = input_37_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_35_cast_fp16 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35_cast_fp16")]; + tensor const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410418112)))]; + tensor hidden_states_23_cast_fp16 = mul(x = normed_35_cast_fp16, y = const_63_promoted_to_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor var_2555 = const()[name = string("op_2555"), val = tensor([0, 2, 1])]; + tensor var_2558_axes_0 = const()[name = string("op_2558_axes_0"), val = tensor([2])]; + tensor var_2556_cast_fp16 = transpose(perm = var_2555, x = hidden_states_23_cast_fp16)[name = string("transpose_155")]; + tensor var_2558_cast_fp16 = expand_dims(axes = var_2558_axes_0, x = var_2556_cast_fp16)[name = string("op_2558_cast_fp16")]; + string var_2574_pad_type_0 = const()[name = string("op_2574_pad_type_0"), val = string("valid")]; + tensor var_2574_strides_0 = const()[name = string("op_2574_strides_0"), val = tensor([1, 1])]; + tensor var_2574_pad_0 = const()[name = string("op_2574_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2574_dilations_0 = const()[name = string("op_2574_dilations_0"), val = tensor([1, 1])]; + int32 var_2574_groups_0 = const()[name = string("op_2574_groups_0"), val = int32(1)]; + tensor var_2574 = conv(dilations = var_2574_dilations_0, groups = var_2574_groups_0, pad = var_2574_pad_0, pad_type = var_2574_pad_type_0, strides = var_2574_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2558_cast_fp16)[name = string("op_2574")]; + tensor var_2579 = const()[name = string("op_2579"), val = tensor([1, 16, 1, 128])]; + tensor var_2580 = reshape(shape = var_2579, x = var_2574)[name = string("op_2580")]; + string var_2596_pad_type_0 = const()[name = string("op_2596_pad_type_0"), val = string("valid")]; + tensor var_2596_strides_0 = const()[name = string("op_2596_strides_0"), val = tensor([1, 1])]; + tensor var_2596_pad_0 = const()[name = string("op_2596_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2596_dilations_0 = const()[name = string("op_2596_dilations_0"), val = tensor([1, 1])]; + int32 var_2596_groups_0 = const()[name = string("op_2596_groups_0"), val = int32(1)]; + tensor var_2596 = conv(dilations = var_2596_dilations_0, groups = var_2596_groups_0, pad = var_2596_pad_0, pad_type = var_2596_pad_type_0, strides = var_2596_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2558_cast_fp16)[name = string("op_2596")]; + tensor var_2601 = const()[name = string("op_2601"), val = tensor([1, 8, 1, 128])]; + tensor var_2602 = reshape(shape = var_2601, x = var_2596)[name = string("op_2602")]; + string var_2618_pad_type_0 = const()[name = string("op_2618_pad_type_0"), val = string("valid")]; + tensor var_2618_strides_0 = const()[name = string("op_2618_strides_0"), val = tensor([1, 1])]; + tensor var_2618_pad_0 = const()[name = string("op_2618_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2618_dilations_0 = const()[name = string("op_2618_dilations_0"), val = tensor([1, 1])]; + int32 var_2618_groups_0 = const()[name = string("op_2618_groups_0"), val = int32(1)]; + tensor var_2618 = conv(dilations = var_2618_dilations_0, groups = var_2618_groups_0, pad = var_2618_pad_0, pad_type = var_2618_pad_type_0, strides = var_2618_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2558_cast_fp16)[name = string("op_2618")]; + tensor var_2623 = const()[name = string("op_2623"), val = tensor([1, 8, 1, 128])]; + tensor var_2624 = reshape(shape = var_2623, x = var_2618)[name = string("op_2624")]; + int32 var_2639 = const()[name = string("op_2639"), val = int32(-1)]; + fp16 const_64_promoted = const()[name = string("const_64_promoted"), val = fp16(-0x1p+0)]; + tensor var_2641 = mul(x = var_2580, y = const_64_promoted)[name = string("op_2641")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_2639, interleave = input_41_interleave_0, values = (var_2580, var_2641))[name = string("input_41")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_2636_to_fp16 = const()[name = string("op_2636_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2636_to_fp16, x = input_41)[name = string("normed_37_cast_fp16")]; + tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_39 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39")]; + tensor const_67 = const()[name = string("const_67"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410420224)))]; + tensor q_5 = mul(x = normed_39, y = const_67)[name = string("q_5")]; + int32 var_2664 = const()[name = string("op_2664"), val = int32(-1)]; + fp16 const_68_promoted = const()[name = string("const_68_promoted"), val = fp16(-0x1p+0)]; + tensor var_2666 = mul(x = var_2602, y = const_68_promoted)[name = string("op_2666")]; + bool input_43_interleave_0 = const()[name = string("input_43_interleave_0"), val = bool(false)]; + tensor input_43 = concat(axis = var_2664, interleave = input_43_interleave_0, values = (var_2602, var_2666))[name = string("input_43")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_2661_to_fp16 = const()[name = string("op_2661_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2661_to_fp16, x = input_43)[name = string("normed_41_cast_fp16")]; + tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_43 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43")]; + tensor const_71 = const()[name = string("const_71"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410420544)))]; + tensor k_5 = mul(x = normed_43, y = const_71)[name = string("k_5")]; + tensor var_2680 = mul(x = q_5, y = cos_1_cast_fp16)[name = string("op_2680")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5)[name = string("x2_9")]; + fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; + tensor var_2701 = mul(x = x2_9, y = const_74_promoted)[name = string("op_2701")]; + int32 var_2703 = const()[name = string("op_2703"), val = int32(-1)]; + bool var_2704_interleave_0 = const()[name = string("op_2704_interleave_0"), val = bool(false)]; + tensor var_2704 = concat(axis = var_2703, interleave = var_2704_interleave_0, values = (var_2701, x1_9))[name = string("op_2704")]; + tensor var_2705 = mul(x = var_2704, y = sin_1_cast_fp16)[name = string("op_2705")]; + tensor query_states_9 = add(x = var_2680, y = var_2705)[name = string("query_states_9")]; + tensor var_2708 = mul(x = k_5, y = cos_1_cast_fp16)[name = string("op_2708")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5)[name = string("x2_11")]; + fp16 const_77_promoted = const()[name = string("const_77_promoted"), val = fp16(-0x1p+0)]; + tensor var_2729 = mul(x = x2_11, y = const_77_promoted)[name = string("op_2729")]; + int32 var_2731 = const()[name = string("op_2731"), val = int32(-1)]; + bool var_2732_interleave_0 = const()[name = string("op_2732_interleave_0"), val = bool(false)]; + tensor var_2732 = concat(axis = var_2731, interleave = var_2732_interleave_0, values = (var_2729, x1_11))[name = string("op_2732")]; + tensor var_2733 = mul(x = var_2732, y = sin_1_cast_fp16)[name = string("op_2733")]; + tensor key_states_9 = add(x = var_2708, y = var_2733)[name = string("key_states_9")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_1746, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_9, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_1746, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_2624, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; + tensor var_2788_begin_0 = const()[name = string("op_2788_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2788_end_0 = const()[name = string("op_2788_end_0"), val = tensor([3, 8, 4096, 128])]; + tensor var_2788_end_mask_0 = const()[name = string("op_2788_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2788_cast_fp16 = slice_by_index(begin = var_2788_begin_0, end = var_2788_end_0, end_mask = var_2788_end_mask_0, x = coreml_update_state_61)[name = string("op_2788_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2788_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_2795_begin_0 = const()[name = string("op_2795_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_2795_end_0 = const()[name = string("op_2795_end_0"), val = tensor([31, 8, 4096, 128])]; + tensor var_2795_end_mask_0 = const()[name = string("op_2795_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2795_cast_fp16 = slice_by_index(begin = var_2795_begin_0, end = var_2795_end_0, end_mask = var_2795_end_mask_0, x = coreml_update_state_61)[name = string("op_2795_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2795_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_35_axes_0 = const()[name = string("x_35_axes_0"), val = tensor([1])]; + tensor x_35_cast_fp16 = expand_dims(axes = x_35_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 2, 1, 1])]; + tensor x_37_cast_fp16 = tile(reps = var_2832, x = x_35_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_2844 = const()[name = string("op_2844"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_11_cast_fp16 = reshape(shape = var_2844, x = x_37_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor x_41_axes_0 = const()[name = string("x_41_axes_0"), val = tensor([1])]; + tensor x_41_cast_fp16 = expand_dims(axes = x_41_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_2852 = const()[name = string("op_2852"), val = tensor([1, 2, 1, 1])]; + tensor x_43_cast_fp16 = tile(reps = var_2852, x = x_41_cast_fp16)[name = string("x_43_cast_fp16")]; + tensor var_2864 = const()[name = string("op_2864"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_15_cast_fp16 = reshape(shape = var_2864, x = x_43_cast_fp16)[name = string("value_states_15_cast_fp16")]; + bool var_2879_transpose_x_1 = const()[name = string("op_2879_transpose_x_1"), val = bool(false)]; + bool var_2879_transpose_y_1 = const()[name = string("op_2879_transpose_y_1"), val = bool(true)]; + tensor var_2879 = matmul(transpose_x = var_2879_transpose_x_1, transpose_y = var_2879_transpose_y_1, x = query_states_9, y = key_states_11_cast_fp16)[name = string("op_2879")]; + fp16 var_2880_to_fp16 = const()[name = string("op_2880_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_2879, y = var_2880_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; + int32 var_2915 = const()[name = string("op_2915"), val = int32(-1)]; + tensor attn_weights_17_cast_fp16 = softmax(axis = var_2915, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; + bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; + bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; + tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_2926_perm_0 = const()[name = string("op_2926_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2930 = const()[name = string("op_2930"), val = tensor([1, 1, 2048])]; + tensor var_2926_cast_fp16 = transpose(perm = var_2926_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_154")]; + tensor attn_output_25_cast_fp16 = reshape(shape = var_2930, x = var_2926_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_2935 = const()[name = string("op_2935"), val = tensor([0, 2, 1])]; + string var_2951_pad_type_0 = const()[name = string("op_2951_pad_type_0"), val = string("valid")]; + int32 var_2951_groups_0 = const()[name = string("op_2951_groups_0"), val = int32(1)]; + tensor var_2951_strides_0 = const()[name = string("op_2951_strides_0"), val = tensor([1])]; + tensor var_2951_pad_0 = const()[name = string("op_2951_pad_0"), val = tensor([0, 0])]; + tensor var_2951_dilations_0 = const()[name = string("op_2951_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410420864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412518080))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2936_cast_fp16 = transpose(perm = var_2935, x = attn_output_25_cast_fp16)[name = string("transpose_153")]; + tensor var_2951_cast_fp16 = conv(dilations = var_2951_dilations_0, groups = var_2951_groups_0, pad = var_2951_pad_0, pad_type = var_2951_pad_type_0, strides = var_2951_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_2936_cast_fp16)[name = string("op_2951_cast_fp16")]; + tensor var_2955 = const()[name = string("op_2955"), val = tensor([0, 2, 1])]; + tensor attn_output_29_cast_fp16 = transpose(perm = var_2955, x = var_2951_cast_fp16)[name = string("transpose_152")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + int32 var_2968 = const()[name = string("op_2968"), val = int32(-1)]; + fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2970_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_86_promoted_to_fp16)[name = string("op_2970_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_2968, interleave = input_47_interleave_0, values = (hidden_states_29_cast_fp16, var_2970_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_2965_to_fp16 = const()[name = string("op_2965_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2965_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; + tensor const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412583680)))]; + tensor x_45_cast_fp16 = mul(x = normed_47_cast_fp16, y = const_89_promoted_to_fp16)[name = string("x_45_cast_fp16")]; + tensor var_2995 = const()[name = string("op_2995"), val = tensor([0, 2, 1])]; + tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; + tensor var_2996 = transpose(perm = var_2995, x = x_45_cast_fp16)[name = string("transpose_151")]; + tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_2996)[name = string("input_49")]; + string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; + tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; + tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; + int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; + tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; + string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; + tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; + tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; + int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; + tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; + tensor c_5 = silu(x = input_51)[name = string("c_5")]; + tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; + string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; + tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; + tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; + int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; + tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; + tensor var_3018_axes_0 = const()[name = string("op_3018_axes_0"), val = tensor([2])]; + tensor var_3018 = squeeze(axes = var_3018_axes_0, x = e_5)[name = string("op_3018")]; + tensor var_3019 = const()[name = string("op_3019"), val = tensor([0, 2, 1])]; + tensor var_3020 = transpose(perm = var_3019, x = var_3018)[name = string("transpose_150")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_3020)[name = string("hidden_states_31_cast_fp16")]; + int32 var_3032 = const()[name = string("op_3032"), val = int32(-1)]; + fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3034_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_3034_cast_fp16")]; + bool input_55_interleave_0 = const()[name = string("input_55_interleave_0"), val = bool(false)]; + tensor input_55_cast_fp16 = concat(axis = var_3032, interleave = input_55_interleave_0, values = (hidden_states_31_cast_fp16, var_3034_cast_fp16))[name = string("input_55_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_3029_to_fp16 = const()[name = string("op_3029_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_3029_to_fp16, x = input_55_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; + tensor const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412585792)))]; + tensor hidden_states_33_cast_fp16 = mul(x = normed_51_cast_fp16, y = const_93_promoted_to_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor var_3051 = const()[name = string("op_3051"), val = tensor([0, 2, 1])]; + tensor var_3054_axes_0 = const()[name = string("op_3054_axes_0"), val = tensor([2])]; + tensor var_3052_cast_fp16 = transpose(perm = var_3051, x = hidden_states_33_cast_fp16)[name = string("transpose_149")]; + tensor var_3054_cast_fp16 = expand_dims(axes = var_3054_axes_0, x = var_3052_cast_fp16)[name = string("op_3054_cast_fp16")]; + string var_3070_pad_type_0 = const()[name = string("op_3070_pad_type_0"), val = string("valid")]; + tensor var_3070_strides_0 = const()[name = string("op_3070_strides_0"), val = tensor([1, 1])]; + tensor var_3070_pad_0 = const()[name = string("op_3070_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3070_dilations_0 = const()[name = string("op_3070_dilations_0"), val = tensor([1, 1])]; + int32 var_3070_groups_0 = const()[name = string("op_3070_groups_0"), val = int32(1)]; + tensor var_3070 = conv(dilations = var_3070_dilations_0, groups = var_3070_groups_0, pad = var_3070_pad_0, pad_type = var_3070_pad_type_0, strides = var_3070_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3054_cast_fp16)[name = string("op_3070")]; + tensor var_3075 = const()[name = string("op_3075"), val = tensor([1, 16, 1, 128])]; + tensor var_3076 = reshape(shape = var_3075, x = var_3070)[name = string("op_3076")]; + string var_3092_pad_type_0 = const()[name = string("op_3092_pad_type_0"), val = string("valid")]; + tensor var_3092_strides_0 = const()[name = string("op_3092_strides_0"), val = tensor([1, 1])]; + tensor var_3092_pad_0 = const()[name = string("op_3092_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3092_dilations_0 = const()[name = string("op_3092_dilations_0"), val = tensor([1, 1])]; + int32 var_3092_groups_0 = const()[name = string("op_3092_groups_0"), val = int32(1)]; + tensor var_3092 = conv(dilations = var_3092_dilations_0, groups = var_3092_groups_0, pad = var_3092_pad_0, pad_type = var_3092_pad_type_0, strides = var_3092_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3054_cast_fp16)[name = string("op_3092")]; + tensor var_3097 = const()[name = string("op_3097"), val = tensor([1, 8, 1, 128])]; + tensor var_3098 = reshape(shape = var_3097, x = var_3092)[name = string("op_3098")]; + string var_3114_pad_type_0 = const()[name = string("op_3114_pad_type_0"), val = string("valid")]; + tensor var_3114_strides_0 = const()[name = string("op_3114_strides_0"), val = tensor([1, 1])]; + tensor var_3114_pad_0 = const()[name = string("op_3114_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3114_dilations_0 = const()[name = string("op_3114_dilations_0"), val = tensor([1, 1])]; + int32 var_3114_groups_0 = const()[name = string("op_3114_groups_0"), val = int32(1)]; + tensor var_3114 = conv(dilations = var_3114_dilations_0, groups = var_3114_groups_0, pad = var_3114_pad_0, pad_type = var_3114_pad_type_0, strides = var_3114_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3054_cast_fp16)[name = string("op_3114")]; + tensor var_3119 = const()[name = string("op_3119"), val = tensor([1, 8, 1, 128])]; + tensor var_3120 = reshape(shape = var_3119, x = var_3114)[name = string("op_3120")]; + int32 var_3135 = const()[name = string("op_3135"), val = int32(-1)]; + fp16 const_94_promoted = const()[name = string("const_94_promoted"), val = fp16(-0x1p+0)]; + tensor var_3137 = mul(x = var_3076, y = const_94_promoted)[name = string("op_3137")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59 = concat(axis = var_3135, interleave = input_59_interleave_0, values = (var_3076, var_3137))[name = string("input_59")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_3132_to_fp16 = const()[name = string("op_3132_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3132_to_fp16, x = input_59)[name = string("normed_53_cast_fp16")]; + tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; + tensor const_97 = const()[name = string("const_97"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412587904)))]; + tensor q_7 = mul(x = normed_55, y = const_97)[name = string("q_7")]; + int32 var_3160 = const()[name = string("op_3160"), val = int32(-1)]; + fp16 const_98_promoted = const()[name = string("const_98_promoted"), val = fp16(-0x1p+0)]; + tensor var_3162 = mul(x = var_3098, y = const_98_promoted)[name = string("op_3162")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61 = concat(axis = var_3160, interleave = input_61_interleave_0, values = (var_3098, var_3162))[name = string("input_61")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_3157_to_fp16 = const()[name = string("op_3157_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3157_to_fp16, x = input_61)[name = string("normed_57_cast_fp16")]; + tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; + tensor const_101 = const()[name = string("const_101"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412588224)))]; + tensor k_7 = mul(x = normed_59, y = const_101)[name = string("k_7")]; + tensor var_3176 = mul(x = q_7, y = cos_1_cast_fp16)[name = string("op_3176")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7)[name = string("x2_13")]; + fp16 const_104_promoted = const()[name = string("const_104_promoted"), val = fp16(-0x1p+0)]; + tensor var_3197 = mul(x = x2_13, y = const_104_promoted)[name = string("op_3197")]; + int32 var_3199 = const()[name = string("op_3199"), val = int32(-1)]; + bool var_3200_interleave_0 = const()[name = string("op_3200_interleave_0"), val = bool(false)]; + tensor var_3200 = concat(axis = var_3199, interleave = var_3200_interleave_0, values = (var_3197, x1_13))[name = string("op_3200")]; + tensor var_3201 = mul(x = var_3200, y = sin_1_cast_fp16)[name = string("op_3201")]; + tensor query_states_13 = add(x = var_3176, y = var_3201)[name = string("query_states_13")]; + tensor var_3204 = mul(x = k_7, y = cos_1_cast_fp16)[name = string("op_3204")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7)[name = string("x2_15")]; + fp16 const_107_promoted = const()[name = string("const_107_promoted"), val = fp16(-0x1p+0)]; + tensor var_3225 = mul(x = x2_15, y = const_107_promoted)[name = string("op_3225")]; + int32 var_3227 = const()[name = string("op_3227"), val = int32(-1)]; + bool var_3228_interleave_0 = const()[name = string("op_3228_interleave_0"), val = bool(false)]; + tensor var_3228 = concat(axis = var_3227, interleave = var_3228_interleave_0, values = (var_3225, x1_15))[name = string("op_3228")]; + tensor var_3229 = mul(x = var_3228, y = sin_1_cast_fp16)[name = string("op_3229")]; + tensor key_states_13 = add(x = var_3204, y = var_3229)[name = string("key_states_13")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_1746, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_13, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_1746, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_3120, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; + tensor var_3284_begin_0 = const()[name = string("op_3284_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_3284_end_0 = const()[name = string("op_3284_end_0"), val = tensor([4, 8, 4096, 128])]; + tensor var_3284_end_mask_0 = const()[name = string("op_3284_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3284_cast_fp16 = slice_by_index(begin = var_3284_begin_0, end = var_3284_end_0, end_mask = var_3284_end_mask_0, x = coreml_update_state_63)[name = string("op_3284_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_3284_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_3291_begin_0 = const()[name = string("op_3291_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_3291_end_0 = const()[name = string("op_3291_end_0"), val = tensor([32, 8, 4096, 128])]; + tensor var_3291_end_mask_0 = const()[name = string("op_3291_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3291_cast_fp16 = slice_by_index(begin = var_3291_begin_0, end = var_3291_end_0, end_mask = var_3291_end_mask_0, x = coreml_update_state_63)[name = string("op_3291_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_3291_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_51_axes_0 = const()[name = string("x_51_axes_0"), val = tensor([1])]; + tensor x_51_cast_fp16 = expand_dims(axes = x_51_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_3328 = const()[name = string("op_3328"), val = tensor([1, 2, 1, 1])]; + tensor x_53_cast_fp16 = tile(reps = var_3328, x = x_51_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_3340 = const()[name = string("op_3340"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_15_cast_fp16 = reshape(shape = var_3340, x = x_53_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor x_57_axes_0 = const()[name = string("x_57_axes_0"), val = tensor([1])]; + tensor x_57_cast_fp16 = expand_dims(axes = x_57_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor var_3348 = const()[name = string("op_3348"), val = tensor([1, 2, 1, 1])]; + tensor x_59_cast_fp16 = tile(reps = var_3348, x = x_57_cast_fp16)[name = string("x_59_cast_fp16")]; + tensor var_3360 = const()[name = string("op_3360"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_21_cast_fp16 = reshape(shape = var_3360, x = x_59_cast_fp16)[name = string("value_states_21_cast_fp16")]; + bool var_3375_transpose_x_1 = const()[name = string("op_3375_transpose_x_1"), val = bool(false)]; + bool var_3375_transpose_y_1 = const()[name = string("op_3375_transpose_y_1"), val = bool(true)]; + tensor var_3375 = matmul(transpose_x = var_3375_transpose_x_1, transpose_y = var_3375_transpose_y_1, x = query_states_13, y = key_states_15_cast_fp16)[name = string("op_3375")]; + fp16 var_3376_to_fp16 = const()[name = string("op_3376_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_19_cast_fp16 = mul(x = var_3375, y = var_3376_to_fp16)[name = string("attn_weights_19_cast_fp16")]; + tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = causal_mask)[name = string("attn_weights_21_cast_fp16")]; + int32 var_3411 = const()[name = string("op_3411"), val = int32(-1)]; + tensor attn_weights_23_cast_fp16 = softmax(axis = var_3411, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_21_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_3422_perm_0 = const()[name = string("op_3422_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3426 = const()[name = string("op_3426"), val = tensor([1, 1, 2048])]; + tensor var_3422_cast_fp16 = transpose(perm = var_3422_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_148")]; + tensor attn_output_35_cast_fp16 = reshape(shape = var_3426, x = var_3422_cast_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor var_3431 = const()[name = string("op_3431"), val = tensor([0, 2, 1])]; + string var_3447_pad_type_0 = const()[name = string("op_3447_pad_type_0"), val = string("valid")]; + int32 var_3447_groups_0 = const()[name = string("op_3447_groups_0"), val = int32(1)]; + tensor var_3447_strides_0 = const()[name = string("op_3447_strides_0"), val = tensor([1])]; + tensor var_3447_pad_0 = const()[name = string("op_3447_pad_0"), val = tensor([0, 0])]; + tensor var_3447_dilations_0 = const()[name = string("op_3447_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412588544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414685760))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3432_cast_fp16 = transpose(perm = var_3431, x = attn_output_35_cast_fp16)[name = string("transpose_147")]; + tensor var_3447_cast_fp16 = conv(dilations = var_3447_dilations_0, groups = var_3447_groups_0, pad = var_3447_pad_0, pad_type = var_3447_pad_type_0, strides = var_3447_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3432_cast_fp16)[name = string("op_3447_cast_fp16")]; + tensor var_3451 = const()[name = string("op_3451"), val = tensor([0, 2, 1])]; + tensor attn_output_39_cast_fp16 = transpose(perm = var_3451, x = var_3447_cast_fp16)[name = string("transpose_146")]; + tensor hidden_states_39_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; + int32 var_3464 = const()[name = string("op_3464"), val = int32(-1)]; + fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3466_cast_fp16 = mul(x = hidden_states_39_cast_fp16, y = const_116_promoted_to_fp16)[name = string("op_3466_cast_fp16")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65_cast_fp16 = concat(axis = var_3464, interleave = input_65_interleave_0, values = (hidden_states_39_cast_fp16, var_3466_cast_fp16))[name = string("input_65_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_3461_to_fp16 = const()[name = string("op_3461_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3461_to_fp16, x = input_65_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; + tensor const_119_promoted_to_fp16 = const()[name = string("const_119_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414751360)))]; + tensor x_61_cast_fp16 = mul(x = normed_63_cast_fp16, y = const_119_promoted_to_fp16)[name = string("x_61_cast_fp16")]; + tensor var_3491 = const()[name = string("op_3491"), val = tensor([0, 2, 1])]; + tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; + tensor var_3492 = transpose(perm = var_3491, x = x_61_cast_fp16)[name = string("transpose_145")]; + tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_3492)[name = string("input_67")]; + string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; + tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; + tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; + int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; + tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; + string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; + tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; + tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; + int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; + tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; + tensor c_7 = silu(x = input_69)[name = string("c_7")]; + tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; + string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; + tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; + tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; + int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; + tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; + tensor var_3514_axes_0 = const()[name = string("op_3514_axes_0"), val = tensor([2])]; + tensor var_3514 = squeeze(axes = var_3514_axes_0, x = e_7)[name = string("op_3514")]; + tensor var_3515 = const()[name = string("op_3515"), val = tensor([0, 2, 1])]; + tensor var_3516 = transpose(perm = var_3515, x = var_3514)[name = string("transpose_144")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_39_cast_fp16, y = var_3516)[name = string("hidden_states_41_cast_fp16")]; + int32 var_3528 = const()[name = string("op_3528"), val = int32(-1)]; + fp16 const_120_promoted_to_fp16 = const()[name = string("const_120_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3530_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_120_promoted_to_fp16)[name = string("op_3530_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_3528, interleave = input_73_interleave_0, values = (hidden_states_41_cast_fp16, var_3530_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_3525_to_fp16 = const()[name = string("op_3525_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3525_to_fp16, x = input_73_cast_fp16)[name = string("normed_65_cast_fp16")]; + tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; + tensor const_123_promoted_to_fp16 = const()[name = string("const_123_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414753472)))]; + tensor hidden_states_43_cast_fp16 = mul(x = normed_67_cast_fp16, y = const_123_promoted_to_fp16)[name = string("hidden_states_43_cast_fp16")]; + tensor var_3547 = const()[name = string("op_3547"), val = tensor([0, 2, 1])]; + tensor var_3550_axes_0 = const()[name = string("op_3550_axes_0"), val = tensor([2])]; + tensor var_3548_cast_fp16 = transpose(perm = var_3547, x = hidden_states_43_cast_fp16)[name = string("transpose_143")]; + tensor var_3550_cast_fp16 = expand_dims(axes = var_3550_axes_0, x = var_3548_cast_fp16)[name = string("op_3550_cast_fp16")]; + string var_3566_pad_type_0 = const()[name = string("op_3566_pad_type_0"), val = string("valid")]; + tensor var_3566_strides_0 = const()[name = string("op_3566_strides_0"), val = tensor([1, 1])]; + tensor var_3566_pad_0 = const()[name = string("op_3566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3566_dilations_0 = const()[name = string("op_3566_dilations_0"), val = tensor([1, 1])]; + int32 var_3566_groups_0 = const()[name = string("op_3566_groups_0"), val = int32(1)]; + tensor var_3566 = conv(dilations = var_3566_dilations_0, groups = var_3566_groups_0, pad = var_3566_pad_0, pad_type = var_3566_pad_type_0, strides = var_3566_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_3550_cast_fp16)[name = string("op_3566")]; + tensor var_3571 = const()[name = string("op_3571"), val = tensor([1, 16, 1, 128])]; + tensor var_3572 = reshape(shape = var_3571, x = var_3566)[name = string("op_3572")]; + string var_3588_pad_type_0 = const()[name = string("op_3588_pad_type_0"), val = string("valid")]; + tensor var_3588_strides_0 = const()[name = string("op_3588_strides_0"), val = tensor([1, 1])]; + tensor var_3588_pad_0 = const()[name = string("op_3588_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3588_dilations_0 = const()[name = string("op_3588_dilations_0"), val = tensor([1, 1])]; + int32 var_3588_groups_0 = const()[name = string("op_3588_groups_0"), val = int32(1)]; + tensor var_3588 = conv(dilations = var_3588_dilations_0, groups = var_3588_groups_0, pad = var_3588_pad_0, pad_type = var_3588_pad_type_0, strides = var_3588_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_3550_cast_fp16)[name = string("op_3588")]; + tensor var_3593 = const()[name = string("op_3593"), val = tensor([1, 8, 1, 128])]; + tensor var_3594 = reshape(shape = var_3593, x = var_3588)[name = string("op_3594")]; + string var_3610_pad_type_0 = const()[name = string("op_3610_pad_type_0"), val = string("valid")]; + tensor var_3610_strides_0 = const()[name = string("op_3610_strides_0"), val = tensor([1, 1])]; + tensor var_3610_pad_0 = const()[name = string("op_3610_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3610_dilations_0 = const()[name = string("op_3610_dilations_0"), val = tensor([1, 1])]; + int32 var_3610_groups_0 = const()[name = string("op_3610_groups_0"), val = int32(1)]; + tensor var_3610 = conv(dilations = var_3610_dilations_0, groups = var_3610_groups_0, pad = var_3610_pad_0, pad_type = var_3610_pad_type_0, strides = var_3610_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_3550_cast_fp16)[name = string("op_3610")]; + tensor var_3615 = const()[name = string("op_3615"), val = tensor([1, 8, 1, 128])]; + tensor var_3616 = reshape(shape = var_3615, x = var_3610)[name = string("op_3616")]; + int32 var_3631 = const()[name = string("op_3631"), val = int32(-1)]; + fp16 const_124_promoted = const()[name = string("const_124_promoted"), val = fp16(-0x1p+0)]; + tensor var_3633 = mul(x = var_3572, y = const_124_promoted)[name = string("op_3633")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77 = concat(axis = var_3631, interleave = input_77_interleave_0, values = (var_3572, var_3633))[name = string("input_77")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_3628_to_fp16 = const()[name = string("op_3628_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3628_to_fp16, x = input_77)[name = string("normed_69_cast_fp16")]; + tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_71 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71")]; + tensor const_127 = const()[name = string("const_127"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414755584)))]; + tensor q_9 = mul(x = normed_71, y = const_127)[name = string("q_9")]; + int32 var_3656 = const()[name = string("op_3656"), val = int32(-1)]; + fp16 const_128_promoted = const()[name = string("const_128_promoted"), val = fp16(-0x1p+0)]; + tensor var_3658 = mul(x = var_3594, y = const_128_promoted)[name = string("op_3658")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79 = concat(axis = var_3656, interleave = input_79_interleave_0, values = (var_3594, var_3658))[name = string("input_79")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_3653_to_fp16 = const()[name = string("op_3653_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3653_to_fp16, x = input_79)[name = string("normed_73_cast_fp16")]; + tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_75 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75")]; + tensor const_131 = const()[name = string("const_131"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414755904)))]; + tensor k_9 = mul(x = normed_75, y = const_131)[name = string("k_9")]; + tensor var_3672 = mul(x = q_9, y = cos_1_cast_fp16)[name = string("op_3672")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9)[name = string("x2_17")]; + fp16 const_134_promoted = const()[name = string("const_134_promoted"), val = fp16(-0x1p+0)]; + tensor var_3693 = mul(x = x2_17, y = const_134_promoted)[name = string("op_3693")]; + int32 var_3695 = const()[name = string("op_3695"), val = int32(-1)]; + bool var_3696_interleave_0 = const()[name = string("op_3696_interleave_0"), val = bool(false)]; + tensor var_3696 = concat(axis = var_3695, interleave = var_3696_interleave_0, values = (var_3693, x1_17))[name = string("op_3696")]; + tensor var_3697 = mul(x = var_3696, y = sin_1_cast_fp16)[name = string("op_3697")]; + tensor query_states_17 = add(x = var_3672, y = var_3697)[name = string("query_states_17")]; + tensor var_3700 = mul(x = k_9, y = cos_1_cast_fp16)[name = string("op_3700")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9)[name = string("x2_19")]; + fp16 const_137_promoted = const()[name = string("const_137_promoted"), val = fp16(-0x1p+0)]; + tensor var_3721 = mul(x = x2_19, y = const_137_promoted)[name = string("op_3721")]; + int32 var_3723 = const()[name = string("op_3723"), val = int32(-1)]; + bool var_3724_interleave_0 = const()[name = string("op_3724_interleave_0"), val = bool(false)]; + tensor var_3724 = concat(axis = var_3723, interleave = var_3724_interleave_0, values = (var_3721, x1_19))[name = string("op_3724")]; + tensor var_3725 = mul(x = var_3724, y = sin_1_cast_fp16)[name = string("op_3725")]; + tensor key_states_17 = add(x = var_3700, y = var_3725)[name = string("key_states_17")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; + tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_1746, concat_35_values3_0))[name = string("concat_35")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_17, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_1746, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_3616, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; + tensor var_3780_begin_0 = const()[name = string("op_3780_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3780_end_0 = const()[name = string("op_3780_end_0"), val = tensor([5, 8, 4096, 128])]; + tensor var_3780_end_mask_0 = const()[name = string("op_3780_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3780_cast_fp16 = slice_by_index(begin = var_3780_begin_0, end = var_3780_end_0, end_mask = var_3780_end_mask_0, x = coreml_update_state_65)[name = string("op_3780_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3780_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_3787_begin_0 = const()[name = string("op_3787_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_3787_end_0 = const()[name = string("op_3787_end_0"), val = tensor([33, 8, 4096, 128])]; + tensor var_3787_end_mask_0 = const()[name = string("op_3787_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3787_cast_fp16 = slice_by_index(begin = var_3787_begin_0, end = var_3787_end_0, end_mask = var_3787_end_mask_0, x = coreml_update_state_65)[name = string("op_3787_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3787_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_3824 = const()[name = string("op_3824"), val = tensor([1, 2, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_3824, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_3836 = const()[name = string("op_3836"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_19_cast_fp16 = reshape(shape = var_3836, x = x_69_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_3844 = const()[name = string("op_3844"), val = tensor([1, 2, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_3844, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_3856 = const()[name = string("op_3856"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_27_cast_fp16 = reshape(shape = var_3856, x = x_75_cast_fp16)[name = string("value_states_27_cast_fp16")]; + bool var_3871_transpose_x_1 = const()[name = string("op_3871_transpose_x_1"), val = bool(false)]; + bool var_3871_transpose_y_1 = const()[name = string("op_3871_transpose_y_1"), val = bool(true)]; + tensor var_3871 = matmul(transpose_x = var_3871_transpose_x_1, transpose_y = var_3871_transpose_y_1, x = query_states_17, y = key_states_19_cast_fp16)[name = string("op_3871")]; + fp16 var_3872_to_fp16 = const()[name = string("op_3872_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_3871, y = var_3872_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; + int32 var_3907 = const()[name = string("op_3907"), val = int32(-1)]; + tensor attn_weights_29_cast_fp16 = softmax(axis = var_3907, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; + bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; + bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; + tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_3918_perm_0 = const()[name = string("op_3918_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3922 = const()[name = string("op_3922"), val = tensor([1, 1, 2048])]; + tensor var_3918_cast_fp16 = transpose(perm = var_3918_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_142")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_3922, x = var_3918_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_3927 = const()[name = string("op_3927"), val = tensor([0, 2, 1])]; + string var_3943_pad_type_0 = const()[name = string("op_3943_pad_type_0"), val = string("valid")]; + int32 var_3943_groups_0 = const()[name = string("op_3943_groups_0"), val = int32(1)]; + tensor var_3943_strides_0 = const()[name = string("op_3943_strides_0"), val = tensor([1])]; + tensor var_3943_pad_0 = const()[name = string("op_3943_pad_0"), val = tensor([0, 0])]; + tensor var_3943_dilations_0 = const()[name = string("op_3943_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414756224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416853440))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3928_cast_fp16 = transpose(perm = var_3927, x = attn_output_45_cast_fp16)[name = string("transpose_141")]; + tensor var_3943_cast_fp16 = conv(dilations = var_3943_dilations_0, groups = var_3943_groups_0, pad = var_3943_pad_0, pad_type = var_3943_pad_type_0, strides = var_3943_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_3928_cast_fp16)[name = string("op_3943_cast_fp16")]; + tensor var_3947 = const()[name = string("op_3947"), val = tensor([0, 2, 1])]; + tensor attn_output_49_cast_fp16 = transpose(perm = var_3947, x = var_3943_cast_fp16)[name = string("transpose_140")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + int32 var_3960 = const()[name = string("op_3960"), val = int32(-1)]; + fp16 const_146_promoted_to_fp16 = const()[name = string("const_146_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3962_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_146_promoted_to_fp16)[name = string("op_3962_cast_fp16")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83_cast_fp16 = concat(axis = var_3960, interleave = input_83_interleave_0, values = (hidden_states_49_cast_fp16, var_3962_cast_fp16))[name = string("input_83_cast_fp16")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_3957_to_fp16 = const()[name = string("op_3957_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_3957_to_fp16, x = input_83_cast_fp16)[name = string("normed_77_cast_fp16")]; + tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_79_cast_fp16 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79_cast_fp16")]; + tensor const_149_promoted_to_fp16 = const()[name = string("const_149_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416919040)))]; + tensor x_77_cast_fp16 = mul(x = normed_79_cast_fp16, y = const_149_promoted_to_fp16)[name = string("x_77_cast_fp16")]; + tensor var_3987 = const()[name = string("op_3987"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_3988 = transpose(perm = var_3987, x = x_77_cast_fp16)[name = string("transpose_139")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_3988)[name = string("input_85")]; + string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; + tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; + tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; + int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; + tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; + string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; + tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; + tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; + int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; + tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; + tensor c_9 = silu(x = input_87)[name = string("c_9")]; + tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; + string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; + tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; + tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; + int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; + tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; + tensor var_4010_axes_0 = const()[name = string("op_4010_axes_0"), val = tensor([2])]; + tensor var_4010 = squeeze(axes = var_4010_axes_0, x = e_9)[name = string("op_4010")]; + tensor var_4011 = const()[name = string("op_4011"), val = tensor([0, 2, 1])]; + tensor var_4012 = transpose(perm = var_4011, x = var_4010)[name = string("transpose_138")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = var_4012)[name = string("hidden_states_51_cast_fp16")]; + int32 var_4024 = const()[name = string("op_4024"), val = int32(-1)]; + fp16 const_150_promoted_to_fp16 = const()[name = string("const_150_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4026_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_150_promoted_to_fp16)[name = string("op_4026_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_4024, interleave = input_91_interleave_0, values = (hidden_states_51_cast_fp16, var_4026_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_4021_to_fp16 = const()[name = string("op_4021_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_4021_to_fp16, x = input_91_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_83_cast_fp16 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83_cast_fp16")]; + tensor const_153_promoted_to_fp16 = const()[name = string("const_153_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416921152)))]; + tensor hidden_states_53_cast_fp16 = mul(x = normed_83_cast_fp16, y = const_153_promoted_to_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor var_4043 = const()[name = string("op_4043"), val = tensor([0, 2, 1])]; + tensor var_4046_axes_0 = const()[name = string("op_4046_axes_0"), val = tensor([2])]; + tensor var_4044_cast_fp16 = transpose(perm = var_4043, x = hidden_states_53_cast_fp16)[name = string("transpose_137")]; + tensor var_4046_cast_fp16 = expand_dims(axes = var_4046_axes_0, x = var_4044_cast_fp16)[name = string("op_4046_cast_fp16")]; + string var_4062_pad_type_0 = const()[name = string("op_4062_pad_type_0"), val = string("valid")]; + tensor var_4062_strides_0 = const()[name = string("op_4062_strides_0"), val = tensor([1, 1])]; + tensor var_4062_pad_0 = const()[name = string("op_4062_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4062_dilations_0 = const()[name = string("op_4062_dilations_0"), val = tensor([1, 1])]; + int32 var_4062_groups_0 = const()[name = string("op_4062_groups_0"), val = int32(1)]; + tensor var_4062 = conv(dilations = var_4062_dilations_0, groups = var_4062_groups_0, pad = var_4062_pad_0, pad_type = var_4062_pad_type_0, strides = var_4062_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_4046_cast_fp16)[name = string("op_4062")]; + tensor var_4067 = const()[name = string("op_4067"), val = tensor([1, 16, 1, 128])]; + tensor var_4068 = reshape(shape = var_4067, x = var_4062)[name = string("op_4068")]; + string var_4084_pad_type_0 = const()[name = string("op_4084_pad_type_0"), val = string("valid")]; + tensor var_4084_strides_0 = const()[name = string("op_4084_strides_0"), val = tensor([1, 1])]; + tensor var_4084_pad_0 = const()[name = string("op_4084_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4084_dilations_0 = const()[name = string("op_4084_dilations_0"), val = tensor([1, 1])]; + int32 var_4084_groups_0 = const()[name = string("op_4084_groups_0"), val = int32(1)]; + tensor var_4084 = conv(dilations = var_4084_dilations_0, groups = var_4084_groups_0, pad = var_4084_pad_0, pad_type = var_4084_pad_type_0, strides = var_4084_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_4046_cast_fp16)[name = string("op_4084")]; + tensor var_4089 = const()[name = string("op_4089"), val = tensor([1, 8, 1, 128])]; + tensor var_4090 = reshape(shape = var_4089, x = var_4084)[name = string("op_4090")]; + string var_4106_pad_type_0 = const()[name = string("op_4106_pad_type_0"), val = string("valid")]; + tensor var_4106_strides_0 = const()[name = string("op_4106_strides_0"), val = tensor([1, 1])]; + tensor var_4106_pad_0 = const()[name = string("op_4106_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4106_dilations_0 = const()[name = string("op_4106_dilations_0"), val = tensor([1, 1])]; + int32 var_4106_groups_0 = const()[name = string("op_4106_groups_0"), val = int32(1)]; + tensor var_4106 = conv(dilations = var_4106_dilations_0, groups = var_4106_groups_0, pad = var_4106_pad_0, pad_type = var_4106_pad_type_0, strides = var_4106_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_4046_cast_fp16)[name = string("op_4106")]; + tensor var_4111 = const()[name = string("op_4111"), val = tensor([1, 8, 1, 128])]; + tensor var_4112 = reshape(shape = var_4111, x = var_4106)[name = string("op_4112")]; + int32 var_4127 = const()[name = string("op_4127"), val = int32(-1)]; + fp16 const_154_promoted = const()[name = string("const_154_promoted"), val = fp16(-0x1p+0)]; + tensor var_4129 = mul(x = var_4068, y = const_154_promoted)[name = string("op_4129")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95 = concat(axis = var_4127, interleave = input_95_interleave_0, values = (var_4068, var_4129))[name = string("input_95")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_4124_to_fp16 = const()[name = string("op_4124_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_4124_to_fp16, x = input_95)[name = string("normed_85_cast_fp16")]; + tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_87 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87")]; + tensor const_157 = const()[name = string("const_157"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416923264)))]; + tensor q_11 = mul(x = normed_87, y = const_157)[name = string("q_11")]; + int32 var_4152 = const()[name = string("op_4152"), val = int32(-1)]; + fp16 const_158_promoted = const()[name = string("const_158_promoted"), val = fp16(-0x1p+0)]; + tensor var_4154 = mul(x = var_4090, y = const_158_promoted)[name = string("op_4154")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97 = concat(axis = var_4152, interleave = input_97_interleave_0, values = (var_4090, var_4154))[name = string("input_97")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_4149_to_fp16 = const()[name = string("op_4149_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4149_to_fp16, x = input_97)[name = string("normed_89_cast_fp16")]; + tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_91 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91")]; + tensor const_161 = const()[name = string("const_161"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416923584)))]; + tensor k_11 = mul(x = normed_91, y = const_161)[name = string("k_11")]; + tensor var_4168 = mul(x = q_11, y = cos_1_cast_fp16)[name = string("op_4168")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11)[name = string("x2_21")]; + fp16 const_164_promoted = const()[name = string("const_164_promoted"), val = fp16(-0x1p+0)]; + tensor var_4189 = mul(x = x2_21, y = const_164_promoted)[name = string("op_4189")]; + int32 var_4191 = const()[name = string("op_4191"), val = int32(-1)]; + bool var_4192_interleave_0 = const()[name = string("op_4192_interleave_0"), val = bool(false)]; + tensor var_4192 = concat(axis = var_4191, interleave = var_4192_interleave_0, values = (var_4189, x1_21))[name = string("op_4192")]; + tensor var_4193 = mul(x = var_4192, y = sin_1_cast_fp16)[name = string("op_4193")]; + tensor query_states_21 = add(x = var_4168, y = var_4193)[name = string("query_states_21")]; + tensor var_4196 = mul(x = k_11, y = cos_1_cast_fp16)[name = string("op_4196")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11)[name = string("x2_23")]; + fp16 const_167_promoted = const()[name = string("const_167_promoted"), val = fp16(-0x1p+0)]; + tensor var_4217 = mul(x = x2_23, y = const_167_promoted)[name = string("op_4217")]; + int32 var_4219 = const()[name = string("op_4219"), val = int32(-1)]; + bool var_4220_interleave_0 = const()[name = string("op_4220_interleave_0"), val = bool(false)]; + tensor var_4220 = concat(axis = var_4219, interleave = var_4220_interleave_0, values = (var_4217, x1_23))[name = string("op_4220")]; + tensor var_4221 = mul(x = var_4220, y = sin_1_cast_fp16)[name = string("op_4221")]; + tensor key_states_21 = add(x = var_4196, y = var_4221)[name = string("key_states_21")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_1746, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_21, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; + int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; + bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; + tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; + tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; + tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; + int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; + bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; + tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_1746, concat_47_values3_0))[name = string("concat_47")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_4112, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; + tensor var_4276_begin_0 = const()[name = string("op_4276_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4276_end_0 = const()[name = string("op_4276_end_0"), val = tensor([6, 8, 4096, 128])]; + tensor var_4276_end_mask_0 = const()[name = string("op_4276_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = var_4276_end_0, end_mask = var_4276_end_mask_0, x = coreml_update_state_67)[name = string("op_4276_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_4276_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_4283_begin_0 = const()[name = string("op_4283_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_4283_end_0 = const()[name = string("op_4283_end_0"), val = tensor([34, 8, 4096, 128])]; + tensor var_4283_end_mask_0 = const()[name = string("op_4283_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4283_cast_fp16 = slice_by_index(begin = var_4283_begin_0, end = var_4283_end_0, end_mask = var_4283_end_mask_0, x = coreml_update_state_67)[name = string("op_4283_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_4283_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_83_axes_0 = const()[name = string("x_83_axes_0"), val = tensor([1])]; + tensor x_83_cast_fp16 = expand_dims(axes = x_83_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_83_cast_fp16")]; + tensor var_4320 = const()[name = string("op_4320"), val = tensor([1, 2, 1, 1])]; + tensor x_85_cast_fp16 = tile(reps = var_4320, x = x_83_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_4332 = const()[name = string("op_4332"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_23_cast_fp16 = reshape(shape = var_4332, x = x_85_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor x_89_axes_0 = const()[name = string("x_89_axes_0"), val = tensor([1])]; + tensor x_89_cast_fp16 = expand_dims(axes = x_89_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_89_cast_fp16")]; + tensor var_4340 = const()[name = string("op_4340"), val = tensor([1, 2, 1, 1])]; + tensor x_91_cast_fp16 = tile(reps = var_4340, x = x_89_cast_fp16)[name = string("x_91_cast_fp16")]; + tensor var_4352 = const()[name = string("op_4352"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_33_cast_fp16 = reshape(shape = var_4352, x = x_91_cast_fp16)[name = string("value_states_33_cast_fp16")]; + bool var_4367_transpose_x_1 = const()[name = string("op_4367_transpose_x_1"), val = bool(false)]; + bool var_4367_transpose_y_1 = const()[name = string("op_4367_transpose_y_1"), val = bool(true)]; + tensor var_4367 = matmul(transpose_x = var_4367_transpose_x_1, transpose_y = var_4367_transpose_y_1, x = query_states_21, y = key_states_23_cast_fp16)[name = string("op_4367")]; + fp16 var_4368_to_fp16 = const()[name = string("op_4368_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_31_cast_fp16 = mul(x = var_4367, y = var_4368_to_fp16)[name = string("attn_weights_31_cast_fp16")]; + tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; + int32 var_4403 = const()[name = string("op_4403"), val = int32(-1)]; + tensor attn_weights_35_cast_fp16 = softmax(axis = var_4403, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; + bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; + bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; + tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = value_states_33_cast_fp16)[name = string("attn_output_51_cast_fp16")]; + tensor var_4414_perm_0 = const()[name = string("op_4414_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4418 = const()[name = string("op_4418"), val = tensor([1, 1, 2048])]; + tensor var_4414_cast_fp16 = transpose(perm = var_4414_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_136")]; + tensor attn_output_55_cast_fp16 = reshape(shape = var_4418, x = var_4414_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_4423 = const()[name = string("op_4423"), val = tensor([0, 2, 1])]; + string var_4439_pad_type_0 = const()[name = string("op_4439_pad_type_0"), val = string("valid")]; + int32 var_4439_groups_0 = const()[name = string("op_4439_groups_0"), val = int32(1)]; + tensor var_4439_strides_0 = const()[name = string("op_4439_strides_0"), val = tensor([1])]; + tensor var_4439_pad_0 = const()[name = string("op_4439_pad_0"), val = tensor([0, 0])]; + tensor var_4439_dilations_0 = const()[name = string("op_4439_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416923904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419021120))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4424_cast_fp16 = transpose(perm = var_4423, x = attn_output_55_cast_fp16)[name = string("transpose_135")]; + tensor var_4439_cast_fp16 = conv(dilations = var_4439_dilations_0, groups = var_4439_groups_0, pad = var_4439_pad_0, pad_type = var_4439_pad_type_0, strides = var_4439_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4424_cast_fp16)[name = string("op_4439_cast_fp16")]; + tensor var_4443 = const()[name = string("op_4443"), val = tensor([0, 2, 1])]; + tensor attn_output_59_cast_fp16 = transpose(perm = var_4443, x = var_4439_cast_fp16)[name = string("transpose_134")]; + tensor hidden_states_59_cast_fp16 = add(x = hidden_states_51_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; + int32 var_4456 = const()[name = string("op_4456"), val = int32(-1)]; + fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4458_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = const_176_promoted_to_fp16)[name = string("op_4458_cast_fp16")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101_cast_fp16 = concat(axis = var_4456, interleave = input_101_interleave_0, values = (hidden_states_59_cast_fp16, var_4458_cast_fp16))[name = string("input_101_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_4453_to_fp16 = const()[name = string("op_4453_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4453_to_fp16, x = input_101_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; + tensor const_179_promoted_to_fp16 = const()[name = string("const_179_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419086720)))]; + tensor x_93_cast_fp16 = mul(x = normed_95_cast_fp16, y = const_179_promoted_to_fp16)[name = string("x_93_cast_fp16")]; + tensor var_4483 = const()[name = string("op_4483"), val = tensor([0, 2, 1])]; + tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; + tensor var_4484 = transpose(perm = var_4483, x = x_93_cast_fp16)[name = string("transpose_133")]; + tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_4484)[name = string("input_103")]; + string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; + tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; + tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; + int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; + tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; + string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; + tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; + tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; + int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; + tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; + tensor c_11 = silu(x = input_105)[name = string("c_11")]; + tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; + string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; + tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; + tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; + int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; + tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; + tensor var_4506_axes_0 = const()[name = string("op_4506_axes_0"), val = tensor([2])]; + tensor var_4506 = squeeze(axes = var_4506_axes_0, x = e_11)[name = string("op_4506")]; + tensor var_4507 = const()[name = string("op_4507"), val = tensor([0, 2, 1])]; + tensor var_4508 = transpose(perm = var_4507, x = var_4506)[name = string("transpose_132")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_4508)[name = string("hidden_states_61_cast_fp16")]; + int32 var_4520 = const()[name = string("op_4520"), val = int32(-1)]; + fp16 const_180_promoted_to_fp16 = const()[name = string("const_180_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4522_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_180_promoted_to_fp16)[name = string("op_4522_cast_fp16")]; + bool input_109_interleave_0 = const()[name = string("input_109_interleave_0"), val = bool(false)]; + tensor input_109_cast_fp16 = concat(axis = var_4520, interleave = input_109_interleave_0, values = (hidden_states_61_cast_fp16, var_4522_cast_fp16))[name = string("input_109_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_4517_to_fp16 = const()[name = string("op_4517_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4517_to_fp16, x = input_109_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; + tensor const_183_promoted_to_fp16 = const()[name = string("const_183_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419088832)))]; + tensor hidden_states_63_cast_fp16 = mul(x = normed_99_cast_fp16, y = const_183_promoted_to_fp16)[name = string("hidden_states_63_cast_fp16")]; + tensor var_4539 = const()[name = string("op_4539"), val = tensor([0, 2, 1])]; + tensor var_4542_axes_0 = const()[name = string("op_4542_axes_0"), val = tensor([2])]; + tensor var_4540_cast_fp16 = transpose(perm = var_4539, x = hidden_states_63_cast_fp16)[name = string("transpose_131")]; + tensor var_4542_cast_fp16 = expand_dims(axes = var_4542_axes_0, x = var_4540_cast_fp16)[name = string("op_4542_cast_fp16")]; + string var_4558_pad_type_0 = const()[name = string("op_4558_pad_type_0"), val = string("valid")]; + tensor var_4558_strides_0 = const()[name = string("op_4558_strides_0"), val = tensor([1, 1])]; + tensor var_4558_pad_0 = const()[name = string("op_4558_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4558_dilations_0 = const()[name = string("op_4558_dilations_0"), val = tensor([1, 1])]; + int32 var_4558_groups_0 = const()[name = string("op_4558_groups_0"), val = int32(1)]; + tensor var_4558 = conv(dilations = var_4558_dilations_0, groups = var_4558_groups_0, pad = var_4558_pad_0, pad_type = var_4558_pad_type_0, strides = var_4558_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_4542_cast_fp16)[name = string("op_4558")]; + tensor var_4563 = const()[name = string("op_4563"), val = tensor([1, 16, 1, 128])]; + tensor var_4564 = reshape(shape = var_4563, x = var_4558)[name = string("op_4564")]; + string var_4580_pad_type_0 = const()[name = string("op_4580_pad_type_0"), val = string("valid")]; + tensor var_4580_strides_0 = const()[name = string("op_4580_strides_0"), val = tensor([1, 1])]; + tensor var_4580_pad_0 = const()[name = string("op_4580_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4580_dilations_0 = const()[name = string("op_4580_dilations_0"), val = tensor([1, 1])]; + int32 var_4580_groups_0 = const()[name = string("op_4580_groups_0"), val = int32(1)]; + tensor var_4580 = conv(dilations = var_4580_dilations_0, groups = var_4580_groups_0, pad = var_4580_pad_0, pad_type = var_4580_pad_type_0, strides = var_4580_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_4542_cast_fp16)[name = string("op_4580")]; + tensor var_4585 = const()[name = string("op_4585"), val = tensor([1, 8, 1, 128])]; + tensor var_4586 = reshape(shape = var_4585, x = var_4580)[name = string("op_4586")]; + string var_4602_pad_type_0 = const()[name = string("op_4602_pad_type_0"), val = string("valid")]; + tensor var_4602_strides_0 = const()[name = string("op_4602_strides_0"), val = tensor([1, 1])]; + tensor var_4602_pad_0 = const()[name = string("op_4602_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4602_dilations_0 = const()[name = string("op_4602_dilations_0"), val = tensor([1, 1])]; + int32 var_4602_groups_0 = const()[name = string("op_4602_groups_0"), val = int32(1)]; + tensor var_4602 = conv(dilations = var_4602_dilations_0, groups = var_4602_groups_0, pad = var_4602_pad_0, pad_type = var_4602_pad_type_0, strides = var_4602_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_4542_cast_fp16)[name = string("op_4602")]; + tensor var_4607 = const()[name = string("op_4607"), val = tensor([1, 8, 1, 128])]; + tensor var_4608 = reshape(shape = var_4607, x = var_4602)[name = string("op_4608")]; + int32 var_4623 = const()[name = string("op_4623"), val = int32(-1)]; + fp16 const_184_promoted = const()[name = string("const_184_promoted"), val = fp16(-0x1p+0)]; + tensor var_4625 = mul(x = var_4564, y = const_184_promoted)[name = string("op_4625")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_4623, interleave = input_113_interleave_0, values = (var_4564, var_4625))[name = string("input_113")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_4620_to_fp16 = const()[name = string("op_4620_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4620_to_fp16, x = input_113)[name = string("normed_101_cast_fp16")]; + tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; + tensor const_187 = const()[name = string("const_187"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419090944)))]; + tensor q_13 = mul(x = normed_103, y = const_187)[name = string("q_13")]; + int32 var_4648 = const()[name = string("op_4648"), val = int32(-1)]; + fp16 const_188_promoted = const()[name = string("const_188_promoted"), val = fp16(-0x1p+0)]; + tensor var_4650 = mul(x = var_4586, y = const_188_promoted)[name = string("op_4650")]; + bool input_115_interleave_0 = const()[name = string("input_115_interleave_0"), val = bool(false)]; + tensor input_115 = concat(axis = var_4648, interleave = input_115_interleave_0, values = (var_4586, var_4650))[name = string("input_115")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_4645_to_fp16 = const()[name = string("op_4645_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4645_to_fp16, x = input_115)[name = string("normed_105_cast_fp16")]; + tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; + tensor const_191 = const()[name = string("const_191"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419091264)))]; + tensor k_13 = mul(x = normed_107, y = const_191)[name = string("k_13")]; + tensor var_4664 = mul(x = q_13, y = cos_1_cast_fp16)[name = string("op_4664")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13)[name = string("x2_25")]; + fp16 const_194_promoted = const()[name = string("const_194_promoted"), val = fp16(-0x1p+0)]; + tensor var_4685 = mul(x = x2_25, y = const_194_promoted)[name = string("op_4685")]; + int32 var_4687 = const()[name = string("op_4687"), val = int32(-1)]; + bool var_4688_interleave_0 = const()[name = string("op_4688_interleave_0"), val = bool(false)]; + tensor var_4688 = concat(axis = var_4687, interleave = var_4688_interleave_0, values = (var_4685, x1_25))[name = string("op_4688")]; + tensor var_4689 = mul(x = var_4688, y = sin_1_cast_fp16)[name = string("op_4689")]; + tensor query_states_25 = add(x = var_4664, y = var_4689)[name = string("query_states_25")]; + tensor var_4692 = mul(x = k_13, y = cos_1_cast_fp16)[name = string("op_4692")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13)[name = string("x2_27")]; + fp16 const_197_promoted = const()[name = string("const_197_promoted"), val = fp16(-0x1p+0)]; + tensor var_4713 = mul(x = x2_27, y = const_197_promoted)[name = string("op_4713")]; + int32 var_4715 = const()[name = string("op_4715"), val = int32(-1)]; + bool var_4716_interleave_0 = const()[name = string("op_4716_interleave_0"), val = bool(false)]; + tensor var_4716 = concat(axis = var_4715, interleave = var_4716_interleave_0, values = (var_4713, x1_27))[name = string("op_4716")]; + tensor var_4717 = mul(x = var_4716, y = sin_1_cast_fp16)[name = string("op_4717")]; + tensor key_states_25 = add(x = var_4692, y = var_4717)[name = string("key_states_25")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_1746, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_25, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_1746, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_4608, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; + tensor var_4772_begin_0 = const()[name = string("op_4772_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_4772_end_0 = const()[name = string("op_4772_end_0"), val = tensor([7, 8, 4096, 128])]; + tensor var_4772_end_mask_0 = const()[name = string("op_4772_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4772_cast_fp16 = slice_by_index(begin = var_4772_begin_0, end = var_4772_end_0, end_mask = var_4772_end_mask_0, x = coreml_update_state_69)[name = string("op_4772_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_4772_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_4779_begin_0 = const()[name = string("op_4779_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_4779_end_0 = const()[name = string("op_4779_end_0"), val = tensor([35, 8, 4096, 128])]; + tensor var_4779_end_mask_0 = const()[name = string("op_4779_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4779_cast_fp16 = slice_by_index(begin = var_4779_begin_0, end = var_4779_end_0, end_mask = var_4779_end_mask_0, x = coreml_update_state_69)[name = string("op_4779_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_4779_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_99_axes_0 = const()[name = string("x_99_axes_0"), val = tensor([1])]; + tensor x_99_cast_fp16 = expand_dims(axes = x_99_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_99_cast_fp16")]; + tensor var_4816 = const()[name = string("op_4816"), val = tensor([1, 2, 1, 1])]; + tensor x_101_cast_fp16 = tile(reps = var_4816, x = x_99_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_4828 = const()[name = string("op_4828"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_27_cast_fp16 = reshape(shape = var_4828, x = x_101_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor x_105_axes_0 = const()[name = string("x_105_axes_0"), val = tensor([1])]; + tensor x_105_cast_fp16 = expand_dims(axes = x_105_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_105_cast_fp16")]; + tensor var_4836 = const()[name = string("op_4836"), val = tensor([1, 2, 1, 1])]; + tensor x_107_cast_fp16 = tile(reps = var_4836, x = x_105_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor var_4848 = const()[name = string("op_4848"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_39_cast_fp16 = reshape(shape = var_4848, x = x_107_cast_fp16)[name = string("value_states_39_cast_fp16")]; + bool var_4863_transpose_x_1 = const()[name = string("op_4863_transpose_x_1"), val = bool(false)]; + bool var_4863_transpose_y_1 = const()[name = string("op_4863_transpose_y_1"), val = bool(true)]; + tensor var_4863 = matmul(transpose_x = var_4863_transpose_x_1, transpose_y = var_4863_transpose_y_1, x = query_states_25, y = key_states_27_cast_fp16)[name = string("op_4863")]; + fp16 var_4864_to_fp16 = const()[name = string("op_4864_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_37_cast_fp16 = mul(x = var_4863, y = var_4864_to_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; + int32 var_4899 = const()[name = string("op_4899"), val = int32(-1)]; + tensor attn_weights_41_cast_fp16 = softmax(axis = var_4899, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; + bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; + bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; + tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = value_states_39_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_4910_perm_0 = const()[name = string("op_4910_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4914 = const()[name = string("op_4914"), val = tensor([1, 1, 2048])]; + tensor var_4910_cast_fp16 = transpose(perm = var_4910_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_130")]; + tensor attn_output_65_cast_fp16 = reshape(shape = var_4914, x = var_4910_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_4919 = const()[name = string("op_4919"), val = tensor([0, 2, 1])]; + string var_4935_pad_type_0 = const()[name = string("op_4935_pad_type_0"), val = string("valid")]; + int32 var_4935_groups_0 = const()[name = string("op_4935_groups_0"), val = int32(1)]; + tensor var_4935_strides_0 = const()[name = string("op_4935_strides_0"), val = tensor([1])]; + tensor var_4935_pad_0 = const()[name = string("op_4935_pad_0"), val = tensor([0, 0])]; + tensor var_4935_dilations_0 = const()[name = string("op_4935_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419091584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421188800))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4920_cast_fp16 = transpose(perm = var_4919, x = attn_output_65_cast_fp16)[name = string("transpose_129")]; + tensor var_4935_cast_fp16 = conv(dilations = var_4935_dilations_0, groups = var_4935_groups_0, pad = var_4935_pad_0, pad_type = var_4935_pad_type_0, strides = var_4935_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_4920_cast_fp16)[name = string("op_4935_cast_fp16")]; + tensor var_4939 = const()[name = string("op_4939"), val = tensor([0, 2, 1])]; + tensor attn_output_69_cast_fp16 = transpose(perm = var_4939, x = var_4935_cast_fp16)[name = string("transpose_128")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + int32 var_4952 = const()[name = string("op_4952"), val = int32(-1)]; + fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4954_cast_fp16 = mul(x = hidden_states_69_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_4954_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_4952, interleave = input_119_interleave_0, values = (hidden_states_69_cast_fp16, var_4954_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_4949_to_fp16 = const()[name = string("op_4949_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4949_to_fp16, x = input_119_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; + tensor const_209_promoted_to_fp16 = const()[name = string("const_209_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421254400)))]; + tensor x_109_cast_fp16 = mul(x = normed_111_cast_fp16, y = const_209_promoted_to_fp16)[name = string("x_109_cast_fp16")]; + tensor var_4979 = const()[name = string("op_4979"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_4980 = transpose(perm = var_4979, x = x_109_cast_fp16)[name = string("transpose_127")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_4980)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; + tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; + tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; + int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; + tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; + tensor c_13 = silu(x = input_123)[name = string("c_13")]; + tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; + string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; + tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; + tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; + int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; + tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; + tensor var_5002_axes_0 = const()[name = string("op_5002_axes_0"), val = tensor([2])]; + tensor var_5002 = squeeze(axes = var_5002_axes_0, x = e_13)[name = string("op_5002")]; + tensor var_5003 = const()[name = string("op_5003"), val = tensor([0, 2, 1])]; + tensor var_5004 = transpose(perm = var_5003, x = var_5002)[name = string("transpose_126")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = var_5004)[name = string("hidden_states_71_cast_fp16")]; + int32 var_5016 = const()[name = string("op_5016"), val = int32(-1)]; + fp16 const_210_promoted_to_fp16 = const()[name = string("const_210_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5018_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_210_promoted_to_fp16)[name = string("op_5018_cast_fp16")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127_cast_fp16 = concat(axis = var_5016, interleave = input_127_interleave_0, values = (hidden_states_71_cast_fp16, var_5018_cast_fp16))[name = string("input_127_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_5013_to_fp16 = const()[name = string("op_5013_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_5013_to_fp16, x = input_127_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; + tensor const_213_promoted_to_fp16 = const()[name = string("const_213_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421256512)))]; + tensor hidden_states_73_cast_fp16 = mul(x = normed_115_cast_fp16, y = const_213_promoted_to_fp16)[name = string("hidden_states_73_cast_fp16")]; + tensor var_5035 = const()[name = string("op_5035"), val = tensor([0, 2, 1])]; + tensor var_5038_axes_0 = const()[name = string("op_5038_axes_0"), val = tensor([2])]; + tensor var_5036_cast_fp16 = transpose(perm = var_5035, x = hidden_states_73_cast_fp16)[name = string("transpose_125")]; + tensor var_5038_cast_fp16 = expand_dims(axes = var_5038_axes_0, x = var_5036_cast_fp16)[name = string("op_5038_cast_fp16")]; + string var_5054_pad_type_0 = const()[name = string("op_5054_pad_type_0"), val = string("valid")]; + tensor var_5054_strides_0 = const()[name = string("op_5054_strides_0"), val = tensor([1, 1])]; + tensor var_5054_pad_0 = const()[name = string("op_5054_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5054_dilations_0 = const()[name = string("op_5054_dilations_0"), val = tensor([1, 1])]; + int32 var_5054_groups_0 = const()[name = string("op_5054_groups_0"), val = int32(1)]; + tensor var_5054 = conv(dilations = var_5054_dilations_0, groups = var_5054_groups_0, pad = var_5054_pad_0, pad_type = var_5054_pad_type_0, strides = var_5054_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_5038_cast_fp16)[name = string("op_5054")]; + tensor var_5059 = const()[name = string("op_5059"), val = tensor([1, 16, 1, 128])]; + tensor var_5060 = reshape(shape = var_5059, x = var_5054)[name = string("op_5060")]; + string var_5076_pad_type_0 = const()[name = string("op_5076_pad_type_0"), val = string("valid")]; + tensor var_5076_strides_0 = const()[name = string("op_5076_strides_0"), val = tensor([1, 1])]; + tensor var_5076_pad_0 = const()[name = string("op_5076_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5076_dilations_0 = const()[name = string("op_5076_dilations_0"), val = tensor([1, 1])]; + int32 var_5076_groups_0 = const()[name = string("op_5076_groups_0"), val = int32(1)]; + tensor var_5076 = conv(dilations = var_5076_dilations_0, groups = var_5076_groups_0, pad = var_5076_pad_0, pad_type = var_5076_pad_type_0, strides = var_5076_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_5038_cast_fp16)[name = string("op_5076")]; + tensor var_5081 = const()[name = string("op_5081"), val = tensor([1, 8, 1, 128])]; + tensor var_5082 = reshape(shape = var_5081, x = var_5076)[name = string("op_5082")]; + string var_5098_pad_type_0 = const()[name = string("op_5098_pad_type_0"), val = string("valid")]; + tensor var_5098_strides_0 = const()[name = string("op_5098_strides_0"), val = tensor([1, 1])]; + tensor var_5098_pad_0 = const()[name = string("op_5098_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5098_dilations_0 = const()[name = string("op_5098_dilations_0"), val = tensor([1, 1])]; + int32 var_5098_groups_0 = const()[name = string("op_5098_groups_0"), val = int32(1)]; + tensor var_5098 = conv(dilations = var_5098_dilations_0, groups = var_5098_groups_0, pad = var_5098_pad_0, pad_type = var_5098_pad_type_0, strides = var_5098_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_5038_cast_fp16)[name = string("op_5098")]; + tensor var_5103 = const()[name = string("op_5103"), val = tensor([1, 8, 1, 128])]; + tensor var_5104 = reshape(shape = var_5103, x = var_5098)[name = string("op_5104")]; + int32 var_5119 = const()[name = string("op_5119"), val = int32(-1)]; + fp16 const_214_promoted = const()[name = string("const_214_promoted"), val = fp16(-0x1p+0)]; + tensor var_5121 = mul(x = var_5060, y = const_214_promoted)[name = string("op_5121")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131 = concat(axis = var_5119, interleave = input_131_interleave_0, values = (var_5060, var_5121))[name = string("input_131")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_5116_to_fp16 = const()[name = string("op_5116_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_5116_to_fp16, x = input_131)[name = string("normed_117_cast_fp16")]; + tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_119 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119")]; + tensor const_217 = const()[name = string("const_217"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421258624)))]; + tensor q_15 = mul(x = normed_119, y = const_217)[name = string("q_15")]; + int32 var_5144 = const()[name = string("op_5144"), val = int32(-1)]; + fp16 const_218_promoted = const()[name = string("const_218_promoted"), val = fp16(-0x1p+0)]; + tensor var_5146 = mul(x = var_5082, y = const_218_promoted)[name = string("op_5146")]; + bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; + tensor input_133 = concat(axis = var_5144, interleave = input_133_interleave_0, values = (var_5082, var_5146))[name = string("input_133")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_5141_to_fp16 = const()[name = string("op_5141_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_5141_to_fp16, x = input_133)[name = string("normed_121_cast_fp16")]; + tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_123 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123")]; + tensor const_221 = const()[name = string("const_221"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421258944)))]; + tensor k_15 = mul(x = normed_123, y = const_221)[name = string("k_15")]; + tensor var_5160 = mul(x = q_15, y = cos_1_cast_fp16)[name = string("op_5160")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15)[name = string("x2_29")]; + fp16 const_224_promoted = const()[name = string("const_224_promoted"), val = fp16(-0x1p+0)]; + tensor var_5181 = mul(x = x2_29, y = const_224_promoted)[name = string("op_5181")]; + int32 var_5183 = const()[name = string("op_5183"), val = int32(-1)]; + bool var_5184_interleave_0 = const()[name = string("op_5184_interleave_0"), val = bool(false)]; + tensor var_5184 = concat(axis = var_5183, interleave = var_5184_interleave_0, values = (var_5181, x1_29))[name = string("op_5184")]; + tensor var_5185 = mul(x = var_5184, y = sin_1_cast_fp16)[name = string("op_5185")]; + tensor query_states_29 = add(x = var_5160, y = var_5185)[name = string("query_states_29")]; + tensor var_5188 = mul(x = k_15, y = cos_1_cast_fp16)[name = string("op_5188")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15)[name = string("x2_31")]; + fp16 const_227_promoted = const()[name = string("const_227_promoted"), val = fp16(-0x1p+0)]; + tensor var_5209 = mul(x = x2_31, y = const_227_promoted)[name = string("op_5209")]; + int32 var_5211 = const()[name = string("op_5211"), val = int32(-1)]; + bool var_5212_interleave_0 = const()[name = string("op_5212_interleave_0"), val = bool(false)]; + tensor var_5212 = concat(axis = var_5211, interleave = var_5212_interleave_0, values = (var_5209, x1_31))[name = string("op_5212")]; + tensor var_5213 = mul(x = var_5212, y = sin_1_cast_fp16)[name = string("op_5213")]; + tensor key_states_29 = add(x = var_5188, y = var_5213)[name = string("key_states_29")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; + int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; + bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; + tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; + tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; + tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_1746, concat_59_values3_0))[name = string("concat_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_29, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([35])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([36])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_1746, concat_63_values3_0))[name = string("concat_63")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_5104, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; + tensor var_5268_begin_0 = const()[name = string("op_5268_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5268_end_0 = const()[name = string("op_5268_end_0"), val = tensor([8, 8, 4096, 128])]; + tensor var_5268_end_mask_0 = const()[name = string("op_5268_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5268_cast_fp16 = slice_by_index(begin = var_5268_begin_0, end = var_5268_end_0, end_mask = var_5268_end_mask_0, x = coreml_update_state_71)[name = string("op_5268_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_5268_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_5275_begin_0 = const()[name = string("op_5275_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_5275_end_0 = const()[name = string("op_5275_end_0"), val = tensor([36, 8, 4096, 128])]; + tensor var_5275_end_mask_0 = const()[name = string("op_5275_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5275_cast_fp16 = slice_by_index(begin = var_5275_begin_0, end = var_5275_end_0, end_mask = var_5275_end_mask_0, x = coreml_update_state_71)[name = string("op_5275_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_5275_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_115_axes_0 = const()[name = string("x_115_axes_0"), val = tensor([1])]; + tensor x_115_cast_fp16 = expand_dims(axes = x_115_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_115_cast_fp16")]; + tensor var_5312 = const()[name = string("op_5312"), val = tensor([1, 2, 1, 1])]; + tensor x_117_cast_fp16 = tile(reps = var_5312, x = x_115_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_5324 = const()[name = string("op_5324"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_31_cast_fp16 = reshape(shape = var_5324, x = x_117_cast_fp16)[name = string("key_states_31_cast_fp16")]; + tensor x_121_axes_0 = const()[name = string("x_121_axes_0"), val = tensor([1])]; + tensor x_121_cast_fp16 = expand_dims(axes = x_121_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_5332 = const()[name = string("op_5332"), val = tensor([1, 2, 1, 1])]; + tensor x_123_cast_fp16 = tile(reps = var_5332, x = x_121_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_5344 = const()[name = string("op_5344"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_45_cast_fp16 = reshape(shape = var_5344, x = x_123_cast_fp16)[name = string("value_states_45_cast_fp16")]; + bool var_5359_transpose_x_1 = const()[name = string("op_5359_transpose_x_1"), val = bool(false)]; + bool var_5359_transpose_y_1 = const()[name = string("op_5359_transpose_y_1"), val = bool(true)]; + tensor var_5359 = matmul(transpose_x = var_5359_transpose_x_1, transpose_y = var_5359_transpose_y_1, x = query_states_29, y = key_states_31_cast_fp16)[name = string("op_5359")]; + fp16 var_5360_to_fp16 = const()[name = string("op_5360_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_43_cast_fp16 = mul(x = var_5359, y = var_5360_to_fp16)[name = string("attn_weights_43_cast_fp16")]; + tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = causal_mask)[name = string("attn_weights_45_cast_fp16")]; + int32 var_5395 = const()[name = string("op_5395"), val = int32(-1)]; + tensor attn_weights_47_cast_fp16 = softmax(axis = var_5395, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; + bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; + bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; + tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = value_states_45_cast_fp16)[name = string("attn_output_71_cast_fp16")]; + tensor var_5406_perm_0 = const()[name = string("op_5406_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5410 = const()[name = string("op_5410"), val = tensor([1, 1, 2048])]; + tensor var_5406_cast_fp16 = transpose(perm = var_5406_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_124")]; + tensor attn_output_75_cast_fp16 = reshape(shape = var_5410, x = var_5406_cast_fp16)[name = string("attn_output_75_cast_fp16")]; + tensor var_5415 = const()[name = string("op_5415"), val = tensor([0, 2, 1])]; + string var_5431_pad_type_0 = const()[name = string("op_5431_pad_type_0"), val = string("valid")]; + int32 var_5431_groups_0 = const()[name = string("op_5431_groups_0"), val = int32(1)]; + tensor var_5431_strides_0 = const()[name = string("op_5431_strides_0"), val = tensor([1])]; + tensor var_5431_pad_0 = const()[name = string("op_5431_pad_0"), val = tensor([0, 0])]; + tensor var_5431_dilations_0 = const()[name = string("op_5431_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421259264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423356480))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5416_cast_fp16 = transpose(perm = var_5415, x = attn_output_75_cast_fp16)[name = string("transpose_123")]; + tensor var_5431_cast_fp16 = conv(dilations = var_5431_dilations_0, groups = var_5431_groups_0, pad = var_5431_pad_0, pad_type = var_5431_pad_type_0, strides = var_5431_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5416_cast_fp16)[name = string("op_5431_cast_fp16")]; + tensor var_5435 = const()[name = string("op_5435"), val = tensor([0, 2, 1])]; + tensor attn_output_79_cast_fp16 = transpose(perm = var_5435, x = var_5431_cast_fp16)[name = string("transpose_122")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + int32 var_5448 = const()[name = string("op_5448"), val = int32(-1)]; + fp16 const_236_promoted_to_fp16 = const()[name = string("const_236_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5450_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = const_236_promoted_to_fp16)[name = string("op_5450_cast_fp16")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137_cast_fp16 = concat(axis = var_5448, interleave = input_137_interleave_0, values = (hidden_states_79_cast_fp16, var_5450_cast_fp16))[name = string("input_137_cast_fp16")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_5445_to_fp16 = const()[name = string("op_5445_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_5445_to_fp16, x = input_137_cast_fp16)[name = string("normed_125_cast_fp16")]; + tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_127_cast_fp16 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127_cast_fp16")]; + tensor const_239_promoted_to_fp16 = const()[name = string("const_239_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423422080)))]; + tensor x_125_cast_fp16 = mul(x = normed_127_cast_fp16, y = const_239_promoted_to_fp16)[name = string("x_125_cast_fp16")]; + tensor var_5475 = const()[name = string("op_5475"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_5476 = transpose(perm = var_5475, x = x_125_cast_fp16)[name = string("transpose_121")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_5476)[name = string("input_139")]; + string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; + tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; + tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; + int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; + tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; + string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; + tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; + tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; + int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; + tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; + tensor c_15 = silu(x = input_141)[name = string("c_15")]; + tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; + string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; + tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; + tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; + int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; + tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; + tensor var_5498_axes_0 = const()[name = string("op_5498_axes_0"), val = tensor([2])]; + tensor var_5498 = squeeze(axes = var_5498_axes_0, x = e_15)[name = string("op_5498")]; + tensor var_5499 = const()[name = string("op_5499"), val = tensor([0, 2, 1])]; + tensor var_5500 = transpose(perm = var_5499, x = var_5498)[name = string("transpose_120")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = var_5500)[name = string("hidden_states_81_cast_fp16")]; + int32 var_5512 = const()[name = string("op_5512"), val = int32(-1)]; + fp16 const_240_promoted_to_fp16 = const()[name = string("const_240_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5514_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_240_promoted_to_fp16)[name = string("op_5514_cast_fp16")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145_cast_fp16 = concat(axis = var_5512, interleave = input_145_interleave_0, values = (hidden_states_81_cast_fp16, var_5514_cast_fp16))[name = string("input_145_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_5509_to_fp16 = const()[name = string("op_5509_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_5509_to_fp16, x = input_145_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_131_cast_fp16 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131_cast_fp16")]; + tensor const_243_promoted_to_fp16 = const()[name = string("const_243_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423424192)))]; + tensor hidden_states_83_cast_fp16 = mul(x = normed_131_cast_fp16, y = const_243_promoted_to_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor var_5531 = const()[name = string("op_5531"), val = tensor([0, 2, 1])]; + tensor var_5534_axes_0 = const()[name = string("op_5534_axes_0"), val = tensor([2])]; + tensor var_5532_cast_fp16 = transpose(perm = var_5531, x = hidden_states_83_cast_fp16)[name = string("transpose_119")]; + tensor var_5534_cast_fp16 = expand_dims(axes = var_5534_axes_0, x = var_5532_cast_fp16)[name = string("op_5534_cast_fp16")]; + string var_5550_pad_type_0 = const()[name = string("op_5550_pad_type_0"), val = string("valid")]; + tensor var_5550_strides_0 = const()[name = string("op_5550_strides_0"), val = tensor([1, 1])]; + tensor var_5550_pad_0 = const()[name = string("op_5550_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5550_dilations_0 = const()[name = string("op_5550_dilations_0"), val = tensor([1, 1])]; + int32 var_5550_groups_0 = const()[name = string("op_5550_groups_0"), val = int32(1)]; + tensor var_5550 = conv(dilations = var_5550_dilations_0, groups = var_5550_groups_0, pad = var_5550_pad_0, pad_type = var_5550_pad_type_0, strides = var_5550_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_5534_cast_fp16)[name = string("op_5550")]; + tensor var_5555 = const()[name = string("op_5555"), val = tensor([1, 16, 1, 128])]; + tensor var_5556 = reshape(shape = var_5555, x = var_5550)[name = string("op_5556")]; + string var_5572_pad_type_0 = const()[name = string("op_5572_pad_type_0"), val = string("valid")]; + tensor var_5572_strides_0 = const()[name = string("op_5572_strides_0"), val = tensor([1, 1])]; + tensor var_5572_pad_0 = const()[name = string("op_5572_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5572_dilations_0 = const()[name = string("op_5572_dilations_0"), val = tensor([1, 1])]; + int32 var_5572_groups_0 = const()[name = string("op_5572_groups_0"), val = int32(1)]; + tensor var_5572 = conv(dilations = var_5572_dilations_0, groups = var_5572_groups_0, pad = var_5572_pad_0, pad_type = var_5572_pad_type_0, strides = var_5572_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_5534_cast_fp16)[name = string("op_5572")]; + tensor var_5577 = const()[name = string("op_5577"), val = tensor([1, 8, 1, 128])]; + tensor var_5578 = reshape(shape = var_5577, x = var_5572)[name = string("op_5578")]; + string var_5594_pad_type_0 = const()[name = string("op_5594_pad_type_0"), val = string("valid")]; + tensor var_5594_strides_0 = const()[name = string("op_5594_strides_0"), val = tensor([1, 1])]; + tensor var_5594_pad_0 = const()[name = string("op_5594_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5594_dilations_0 = const()[name = string("op_5594_dilations_0"), val = tensor([1, 1])]; + int32 var_5594_groups_0 = const()[name = string("op_5594_groups_0"), val = int32(1)]; + tensor var_5594 = conv(dilations = var_5594_dilations_0, groups = var_5594_groups_0, pad = var_5594_pad_0, pad_type = var_5594_pad_type_0, strides = var_5594_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_5534_cast_fp16)[name = string("op_5594")]; + tensor var_5599 = const()[name = string("op_5599"), val = tensor([1, 8, 1, 128])]; + tensor var_5600 = reshape(shape = var_5599, x = var_5594)[name = string("op_5600")]; + int32 var_5615 = const()[name = string("op_5615"), val = int32(-1)]; + fp16 const_244_promoted = const()[name = string("const_244_promoted"), val = fp16(-0x1p+0)]; + tensor var_5617 = mul(x = var_5556, y = const_244_promoted)[name = string("op_5617")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149 = concat(axis = var_5615, interleave = input_149_interleave_0, values = (var_5556, var_5617))[name = string("input_149")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_5612_to_fp16 = const()[name = string("op_5612_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5612_to_fp16, x = input_149)[name = string("normed_133_cast_fp16")]; + tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_135 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135")]; + tensor const_247 = const()[name = string("const_247"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423426304)))]; + tensor q_17 = mul(x = normed_135, y = const_247)[name = string("q_17")]; + int32 var_5640 = const()[name = string("op_5640"), val = int32(-1)]; + fp16 const_248_promoted = const()[name = string("const_248_promoted"), val = fp16(-0x1p+0)]; + tensor var_5642 = mul(x = var_5578, y = const_248_promoted)[name = string("op_5642")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151 = concat(axis = var_5640, interleave = input_151_interleave_0, values = (var_5578, var_5642))[name = string("input_151")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_5637_to_fp16 = const()[name = string("op_5637_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5637_to_fp16, x = input_151)[name = string("normed_137_cast_fp16")]; + tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_139 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139")]; + tensor const_251 = const()[name = string("const_251"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423426624)))]; + tensor k_17 = mul(x = normed_139, y = const_251)[name = string("k_17")]; + tensor var_5656 = mul(x = q_17, y = cos_1_cast_fp16)[name = string("op_5656")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17)[name = string("x2_33")]; + fp16 const_254_promoted = const()[name = string("const_254_promoted"), val = fp16(-0x1p+0)]; + tensor var_5677 = mul(x = x2_33, y = const_254_promoted)[name = string("op_5677")]; + int32 var_5679 = const()[name = string("op_5679"), val = int32(-1)]; + bool var_5680_interleave_0 = const()[name = string("op_5680_interleave_0"), val = bool(false)]; + tensor var_5680 = concat(axis = var_5679, interleave = var_5680_interleave_0, values = (var_5677, x1_33))[name = string("op_5680")]; + tensor var_5681 = mul(x = var_5680, y = sin_1_cast_fp16)[name = string("op_5681")]; + tensor query_states_33 = add(x = var_5656, y = var_5681)[name = string("query_states_33")]; + tensor var_5684 = mul(x = k_17, y = cos_1_cast_fp16)[name = string("op_5684")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17)[name = string("x1_35")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17)[name = string("x2_35")]; + fp16 const_257_promoted = const()[name = string("const_257_promoted"), val = fp16(-0x1p+0)]; + tensor var_5705 = mul(x = x2_35, y = const_257_promoted)[name = string("op_5705")]; + int32 var_5707 = const()[name = string("op_5707"), val = int32(-1)]; + bool var_5708_interleave_0 = const()[name = string("op_5708_interleave_0"), val = bool(false)]; + tensor var_5708 = concat(axis = var_5707, interleave = var_5708_interleave_0, values = (var_5705, x1_35))[name = string("op_5708")]; + tensor var_5709 = mul(x = var_5708, y = sin_1_cast_fp16)[name = string("op_5709")]; + tensor key_states_33 = add(x = var_5684, y = var_5709)[name = string("key_states_33")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; + tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; + tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; + int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; + bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; + tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_1746, concat_67_values3_0))[name = string("concat_67")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_33, x = coreml_update_state_71)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([36])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([37])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_1746, concat_71_values3_0))[name = string("concat_71")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = var_5600, x = coreml_update_state_72)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; + tensor var_5764_begin_0 = const()[name = string("op_5764_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_5764_end_0 = const()[name = string("op_5764_end_0"), val = tensor([9, 8, 4096, 128])]; + tensor var_5764_end_mask_0 = const()[name = string("op_5764_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5764_cast_fp16 = slice_by_index(begin = var_5764_begin_0, end = var_5764_end_0, end_mask = var_5764_end_mask_0, x = coreml_update_state_73)[name = string("op_5764_cast_fp16")]; + tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; + tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_5764_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; + tensor var_5771_begin_0 = const()[name = string("op_5771_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_5771_end_0 = const()[name = string("op_5771_end_0"), val = tensor([37, 8, 4096, 128])]; + tensor var_5771_end_mask_0 = const()[name = string("op_5771_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5771_cast_fp16 = slice_by_index(begin = var_5771_begin_0, end = var_5771_end_0, end_mask = var_5771_end_mask_0, x = coreml_update_state_73)[name = string("op_5771_cast_fp16")]; + tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; + tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_5771_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; + tensor x_131_axes_0 = const()[name = string("x_131_axes_0"), val = tensor([1])]; + tensor x_131_cast_fp16 = expand_dims(axes = x_131_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_5808 = const()[name = string("op_5808"), val = tensor([1, 2, 1, 1])]; + tensor x_133_cast_fp16 = tile(reps = var_5808, x = x_131_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_5820 = const()[name = string("op_5820"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_35_cast_fp16 = reshape(shape = var_5820, x = x_133_cast_fp16)[name = string("key_states_35_cast_fp16")]; + tensor x_137_axes_0 = const()[name = string("x_137_axes_0"), val = tensor([1])]; + tensor x_137_cast_fp16 = expand_dims(axes = x_137_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_137_cast_fp16")]; + tensor var_5828 = const()[name = string("op_5828"), val = tensor([1, 2, 1, 1])]; + tensor x_139_cast_fp16 = tile(reps = var_5828, x = x_137_cast_fp16)[name = string("x_139_cast_fp16")]; + tensor var_5840 = const()[name = string("op_5840"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_51_cast_fp16 = reshape(shape = var_5840, x = x_139_cast_fp16)[name = string("value_states_51_cast_fp16")]; + bool var_5855_transpose_x_1 = const()[name = string("op_5855_transpose_x_1"), val = bool(false)]; + bool var_5855_transpose_y_1 = const()[name = string("op_5855_transpose_y_1"), val = bool(true)]; + tensor var_5855 = matmul(transpose_x = var_5855_transpose_x_1, transpose_y = var_5855_transpose_y_1, x = query_states_33, y = key_states_35_cast_fp16)[name = string("op_5855")]; + fp16 var_5856_to_fp16 = const()[name = string("op_5856_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_49_cast_fp16 = mul(x = var_5855, y = var_5856_to_fp16)[name = string("attn_weights_49_cast_fp16")]; + tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; + int32 var_5891 = const()[name = string("op_5891"), val = int32(-1)]; + tensor attn_weights_53_cast_fp16 = softmax(axis = var_5891, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; + bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; + bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; + tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = value_states_51_cast_fp16)[name = string("attn_output_81_cast_fp16")]; + tensor var_5902_perm_0 = const()[name = string("op_5902_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5906 = const()[name = string("op_5906"), val = tensor([1, 1, 2048])]; + tensor var_5902_cast_fp16 = transpose(perm = var_5902_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_118")]; + tensor attn_output_85_cast_fp16 = reshape(shape = var_5906, x = var_5902_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_5911 = const()[name = string("op_5911"), val = tensor([0, 2, 1])]; + string var_5927_pad_type_0 = const()[name = string("op_5927_pad_type_0"), val = string("valid")]; + int32 var_5927_groups_0 = const()[name = string("op_5927_groups_0"), val = int32(1)]; + tensor var_5927_strides_0 = const()[name = string("op_5927_strides_0"), val = tensor([1])]; + tensor var_5927_pad_0 = const()[name = string("op_5927_pad_0"), val = tensor([0, 0])]; + tensor var_5927_dilations_0 = const()[name = string("op_5927_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423426944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425524160))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5912_cast_fp16 = transpose(perm = var_5911, x = attn_output_85_cast_fp16)[name = string("transpose_117")]; + tensor var_5927_cast_fp16 = conv(dilations = var_5927_dilations_0, groups = var_5927_groups_0, pad = var_5927_pad_0, pad_type = var_5927_pad_type_0, strides = var_5927_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_5912_cast_fp16)[name = string("op_5927_cast_fp16")]; + tensor var_5931 = const()[name = string("op_5931"), val = tensor([0, 2, 1])]; + tensor attn_output_89_cast_fp16 = transpose(perm = var_5931, x = var_5927_cast_fp16)[name = string("transpose_116")]; + tensor hidden_states_89_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; + int32 var_5944 = const()[name = string("op_5944"), val = int32(-1)]; + fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5946_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_5946_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_5944, interleave = input_155_interleave_0, values = (hidden_states_89_cast_fp16, var_5946_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_5941_to_fp16 = const()[name = string("op_5941_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_5941_to_fp16, x = input_155_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; + tensor const_269_promoted_to_fp16 = const()[name = string("const_269_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425589760)))]; + tensor x_141_cast_fp16 = mul(x = normed_143_cast_fp16, y = const_269_promoted_to_fp16)[name = string("x_141_cast_fp16")]; + tensor var_5971 = const()[name = string("op_5971"), val = tensor([0, 2, 1])]; + tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; + tensor var_5972 = transpose(perm = var_5971, x = x_141_cast_fp16)[name = string("transpose_115")]; + tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_5972)[name = string("input_157")]; + string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; + tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; + tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; + int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; + tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; + string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; + tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; + tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; + int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; + tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; + tensor c_17 = silu(x = input_159)[name = string("c_17")]; + tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; + string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; + tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; + tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; + int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; + tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; + tensor var_5994_axes_0 = const()[name = string("op_5994_axes_0"), val = tensor([2])]; + tensor var_5994 = squeeze(axes = var_5994_axes_0, x = e_17)[name = string("op_5994")]; + tensor var_5995 = const()[name = string("op_5995"), val = tensor([0, 2, 1])]; + tensor var_5996 = transpose(perm = var_5995, x = var_5994)[name = string("transpose_114")]; + tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_5996)[name = string("hidden_states_91_cast_fp16")]; + int32 var_6008 = const()[name = string("op_6008"), val = int32(-1)]; + fp16 const_270_promoted_to_fp16 = const()[name = string("const_270_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6010_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_270_promoted_to_fp16)[name = string("op_6010_cast_fp16")]; + bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)]; + tensor input_163_cast_fp16 = concat(axis = var_6008, interleave = input_163_interleave_0, values = (hidden_states_91_cast_fp16, var_6010_cast_fp16))[name = string("input_163_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_6005_to_fp16 = const()[name = string("op_6005_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_6005_to_fp16, x = input_163_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; + tensor const_273_promoted_to_fp16 = const()[name = string("const_273_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425591872)))]; + tensor hidden_states_93_cast_fp16 = mul(x = normed_147_cast_fp16, y = const_273_promoted_to_fp16)[name = string("hidden_states_93_cast_fp16")]; + tensor var_6027 = const()[name = string("op_6027"), val = tensor([0, 2, 1])]; + tensor var_6030_axes_0 = const()[name = string("op_6030_axes_0"), val = tensor([2])]; + tensor var_6028_cast_fp16 = transpose(perm = var_6027, x = hidden_states_93_cast_fp16)[name = string("transpose_113")]; + tensor var_6030_cast_fp16 = expand_dims(axes = var_6030_axes_0, x = var_6028_cast_fp16)[name = string("op_6030_cast_fp16")]; + string var_6046_pad_type_0 = const()[name = string("op_6046_pad_type_0"), val = string("valid")]; + tensor var_6046_strides_0 = const()[name = string("op_6046_strides_0"), val = tensor([1, 1])]; + tensor var_6046_pad_0 = const()[name = string("op_6046_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6046_dilations_0 = const()[name = string("op_6046_dilations_0"), val = tensor([1, 1])]; + int32 var_6046_groups_0 = const()[name = string("op_6046_groups_0"), val = int32(1)]; + tensor var_6046 = conv(dilations = var_6046_dilations_0, groups = var_6046_groups_0, pad = var_6046_pad_0, pad_type = var_6046_pad_type_0, strides = var_6046_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_6030_cast_fp16)[name = string("op_6046")]; + tensor var_6051 = const()[name = string("op_6051"), val = tensor([1, 16, 1, 128])]; + tensor var_6052 = reshape(shape = var_6051, x = var_6046)[name = string("op_6052")]; + string var_6068_pad_type_0 = const()[name = string("op_6068_pad_type_0"), val = string("valid")]; + tensor var_6068_strides_0 = const()[name = string("op_6068_strides_0"), val = tensor([1, 1])]; + tensor var_6068_pad_0 = const()[name = string("op_6068_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6068_dilations_0 = const()[name = string("op_6068_dilations_0"), val = tensor([1, 1])]; + int32 var_6068_groups_0 = const()[name = string("op_6068_groups_0"), val = int32(1)]; + tensor var_6068 = conv(dilations = var_6068_dilations_0, groups = var_6068_groups_0, pad = var_6068_pad_0, pad_type = var_6068_pad_type_0, strides = var_6068_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_6030_cast_fp16)[name = string("op_6068")]; + tensor var_6073 = const()[name = string("op_6073"), val = tensor([1, 8, 1, 128])]; + tensor var_6074 = reshape(shape = var_6073, x = var_6068)[name = string("op_6074")]; + string var_6090_pad_type_0 = const()[name = string("op_6090_pad_type_0"), val = string("valid")]; + tensor var_6090_strides_0 = const()[name = string("op_6090_strides_0"), val = tensor([1, 1])]; + tensor var_6090_pad_0 = const()[name = string("op_6090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6090_dilations_0 = const()[name = string("op_6090_dilations_0"), val = tensor([1, 1])]; + int32 var_6090_groups_0 = const()[name = string("op_6090_groups_0"), val = int32(1)]; + tensor var_6090 = conv(dilations = var_6090_dilations_0, groups = var_6090_groups_0, pad = var_6090_pad_0, pad_type = var_6090_pad_type_0, strides = var_6090_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_6030_cast_fp16)[name = string("op_6090")]; + tensor var_6095 = const()[name = string("op_6095"), val = tensor([1, 8, 1, 128])]; + tensor var_6096 = reshape(shape = var_6095, x = var_6090)[name = string("op_6096")]; + int32 var_6111 = const()[name = string("op_6111"), val = int32(-1)]; + fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; + tensor var_6113 = mul(x = var_6052, y = const_274_promoted)[name = string("op_6113")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167 = concat(axis = var_6111, interleave = input_167_interleave_0, values = (var_6052, var_6113))[name = string("input_167")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_6108_to_fp16 = const()[name = string("op_6108_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_6108_to_fp16, x = input_167)[name = string("normed_149_cast_fp16")]; + tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; + tensor const_277 = const()[name = string("const_277"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425593984)))]; + tensor q_19 = mul(x = normed_151, y = const_277)[name = string("q_19")]; + int32 var_6136 = const()[name = string("op_6136"), val = int32(-1)]; + fp16 const_278_promoted = const()[name = string("const_278_promoted"), val = fp16(-0x1p+0)]; + tensor var_6138 = mul(x = var_6074, y = const_278_promoted)[name = string("op_6138")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169 = concat(axis = var_6136, interleave = input_169_interleave_0, values = (var_6074, var_6138))[name = string("input_169")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_6133_to_fp16 = const()[name = string("op_6133_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_6133_to_fp16, x = input_169)[name = string("normed_153_cast_fp16")]; + tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; + tensor const_281 = const()[name = string("const_281"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425594304)))]; + tensor k_19 = mul(x = normed_155, y = const_281)[name = string("k_19")]; + tensor var_6152 = mul(x = q_19, y = cos_1_cast_fp16)[name = string("op_6152")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19)[name = string("x1_37")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19)[name = string("x2_37")]; + fp16 const_284_promoted = const()[name = string("const_284_promoted"), val = fp16(-0x1p+0)]; + tensor var_6173 = mul(x = x2_37, y = const_284_promoted)[name = string("op_6173")]; + int32 var_6175 = const()[name = string("op_6175"), val = int32(-1)]; + bool var_6176_interleave_0 = const()[name = string("op_6176_interleave_0"), val = bool(false)]; + tensor var_6176 = concat(axis = var_6175, interleave = var_6176_interleave_0, values = (var_6173, x1_37))[name = string("op_6176")]; + tensor var_6177 = mul(x = var_6176, y = sin_1_cast_fp16)[name = string("op_6177")]; + tensor query_states_37 = add(x = var_6152, y = var_6177)[name = string("query_states_37")]; + tensor var_6180 = mul(x = k_19, y = cos_1_cast_fp16)[name = string("op_6180")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19)[name = string("x1_39")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19)[name = string("x2_39")]; + fp16 const_287_promoted = const()[name = string("const_287_promoted"), val = fp16(-0x1p+0)]; + tensor var_6201 = mul(x = x2_39, y = const_287_promoted)[name = string("op_6201")]; + int32 var_6203 = const()[name = string("op_6203"), val = int32(-1)]; + bool var_6204_interleave_0 = const()[name = string("op_6204_interleave_0"), val = bool(false)]; + tensor var_6204 = concat(axis = var_6203, interleave = var_6204_interleave_0, values = (var_6201, x1_39))[name = string("op_6204")]; + tensor var_6205 = mul(x = var_6204, y = sin_1_cast_fp16)[name = string("op_6205")]; + tensor key_states_37 = add(x = var_6180, y = var_6205)[name = string("key_states_37")]; + tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; + tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; + tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_112, concat_75_values1_0, var_1746, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_37, x = coreml_update_state_73)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([37])]; + tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; + tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; + tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([38])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_118, concat_79_values1_0, var_1746, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = var_6096, x = coreml_update_state_74)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; + tensor var_6260_begin_0 = const()[name = string("op_6260_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6260_end_0 = const()[name = string("op_6260_end_0"), val = tensor([10, 8, 4096, 128])]; + tensor var_6260_end_mask_0 = const()[name = string("op_6260_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6260_cast_fp16 = slice_by_index(begin = var_6260_begin_0, end = var_6260_end_0, end_mask = var_6260_end_mask_0, x = coreml_update_state_75)[name = string("op_6260_cast_fp16")]; + tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; + tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_6260_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; + tensor var_6267_begin_0 = const()[name = string("op_6267_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_6267_end_0 = const()[name = string("op_6267_end_0"), val = tensor([38, 8, 4096, 128])]; + tensor var_6267_end_mask_0 = const()[name = string("op_6267_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6267_cast_fp16 = slice_by_index(begin = var_6267_begin_0, end = var_6267_end_0, end_mask = var_6267_end_mask_0, x = coreml_update_state_75)[name = string("op_6267_cast_fp16")]; + tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; + tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_6267_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; + tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; + tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_147_cast_fp16")]; + tensor var_6304 = const()[name = string("op_6304"), val = tensor([1, 2, 1, 1])]; + tensor x_149_cast_fp16 = tile(reps = var_6304, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_6316 = const()[name = string("op_6316"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_39_cast_fp16 = reshape(shape = var_6316, x = x_149_cast_fp16)[name = string("key_states_39_cast_fp16")]; + tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; + tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_6324 = const()[name = string("op_6324"), val = tensor([1, 2, 1, 1])]; + tensor x_155_cast_fp16 = tile(reps = var_6324, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; + tensor var_6336 = const()[name = string("op_6336"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_57_cast_fp16 = reshape(shape = var_6336, x = x_155_cast_fp16)[name = string("value_states_57_cast_fp16")]; + bool var_6351_transpose_x_1 = const()[name = string("op_6351_transpose_x_1"), val = bool(false)]; + bool var_6351_transpose_y_1 = const()[name = string("op_6351_transpose_y_1"), val = bool(true)]; + tensor var_6351 = matmul(transpose_x = var_6351_transpose_x_1, transpose_y = var_6351_transpose_y_1, x = query_states_37, y = key_states_39_cast_fp16)[name = string("op_6351")]; + fp16 var_6352_to_fp16 = const()[name = string("op_6352_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_55_cast_fp16 = mul(x = var_6351, y = var_6352_to_fp16)[name = string("attn_weights_55_cast_fp16")]; + tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = causal_mask)[name = string("attn_weights_57_cast_fp16")]; + int32 var_6387 = const()[name = string("op_6387"), val = int32(-1)]; + tensor attn_weights_59_cast_fp16 = softmax(axis = var_6387, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; + bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; + bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; + tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = value_states_57_cast_fp16)[name = string("attn_output_91_cast_fp16")]; + tensor var_6398_perm_0 = const()[name = string("op_6398_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6402 = const()[name = string("op_6402"), val = tensor([1, 1, 2048])]; + tensor var_6398_cast_fp16 = transpose(perm = var_6398_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_112")]; + tensor attn_output_95_cast_fp16 = reshape(shape = var_6402, x = var_6398_cast_fp16)[name = string("attn_output_95_cast_fp16")]; + tensor var_6407 = const()[name = string("op_6407"), val = tensor([0, 2, 1])]; + string var_6423_pad_type_0 = const()[name = string("op_6423_pad_type_0"), val = string("valid")]; + int32 var_6423_groups_0 = const()[name = string("op_6423_groups_0"), val = int32(1)]; + tensor var_6423_strides_0 = const()[name = string("op_6423_strides_0"), val = tensor([1])]; + tensor var_6423_pad_0 = const()[name = string("op_6423_pad_0"), val = tensor([0, 0])]; + tensor var_6423_dilations_0 = const()[name = string("op_6423_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425594624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427691840))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6408_cast_fp16 = transpose(perm = var_6407, x = attn_output_95_cast_fp16)[name = string("transpose_111")]; + tensor var_6423_cast_fp16 = conv(dilations = var_6423_dilations_0, groups = var_6423_groups_0, pad = var_6423_pad_0, pad_type = var_6423_pad_type_0, strides = var_6423_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6408_cast_fp16)[name = string("op_6423_cast_fp16")]; + tensor var_6427 = const()[name = string("op_6427"), val = tensor([0, 2, 1])]; + tensor attn_output_99_cast_fp16 = transpose(perm = var_6427, x = var_6423_cast_fp16)[name = string("transpose_110")]; + tensor hidden_states_99_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; + int32 var_6440 = const()[name = string("op_6440"), val = int32(-1)]; + fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6442_cast_fp16 = mul(x = hidden_states_99_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_6442_cast_fp16")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173_cast_fp16 = concat(axis = var_6440, interleave = input_173_interleave_0, values = (hidden_states_99_cast_fp16, var_6442_cast_fp16))[name = string("input_173_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_6437_to_fp16 = const()[name = string("op_6437_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_6437_to_fp16, x = input_173_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; + tensor const_299_promoted_to_fp16 = const()[name = string("const_299_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427757440)))]; + tensor x_157_cast_fp16 = mul(x = normed_159_cast_fp16, y = const_299_promoted_to_fp16)[name = string("x_157_cast_fp16")]; + tensor var_6467 = const()[name = string("op_6467"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_6468 = transpose(perm = var_6467, x = x_157_cast_fp16)[name = string("transpose_109")]; + tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_6468)[name = string("input_175")]; + string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; + tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; + tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; + int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; + tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; + string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; + tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; + tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; + int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; + tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; + tensor c_19 = silu(x = input_177)[name = string("c_19")]; + tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; + string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; + tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; + tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; + int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; + tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; + tensor var_6490_axes_0 = const()[name = string("op_6490_axes_0"), val = tensor([2])]; + tensor var_6490 = squeeze(axes = var_6490_axes_0, x = e_19)[name = string("op_6490")]; + tensor var_6491 = const()[name = string("op_6491"), val = tensor([0, 2, 1])]; + tensor var_6492 = transpose(perm = var_6491, x = var_6490)[name = string("transpose_108")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_99_cast_fp16, y = var_6492)[name = string("hidden_states_101_cast_fp16")]; + int32 var_6504 = const()[name = string("op_6504"), val = int32(-1)]; + fp16 const_300_promoted_to_fp16 = const()[name = string("const_300_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6506_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_300_promoted_to_fp16)[name = string("op_6506_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_6504, interleave = input_181_interleave_0, values = (hidden_states_101_cast_fp16, var_6506_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_6501_to_fp16 = const()[name = string("op_6501_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_6501_to_fp16, x = input_181_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; + tensor const_303_promoted_to_fp16 = const()[name = string("const_303_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427759552)))]; + tensor hidden_states_103_cast_fp16 = mul(x = normed_163_cast_fp16, y = const_303_promoted_to_fp16)[name = string("hidden_states_103_cast_fp16")]; + tensor var_6523 = const()[name = string("op_6523"), val = tensor([0, 2, 1])]; + tensor var_6526_axes_0 = const()[name = string("op_6526_axes_0"), val = tensor([2])]; + tensor var_6524_cast_fp16 = transpose(perm = var_6523, x = hidden_states_103_cast_fp16)[name = string("transpose_107")]; + tensor var_6526_cast_fp16 = expand_dims(axes = var_6526_axes_0, x = var_6524_cast_fp16)[name = string("op_6526_cast_fp16")]; + string var_6542_pad_type_0 = const()[name = string("op_6542_pad_type_0"), val = string("valid")]; + tensor var_6542_strides_0 = const()[name = string("op_6542_strides_0"), val = tensor([1, 1])]; + tensor var_6542_pad_0 = const()[name = string("op_6542_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6542_dilations_0 = const()[name = string("op_6542_dilations_0"), val = tensor([1, 1])]; + int32 var_6542_groups_0 = const()[name = string("op_6542_groups_0"), val = int32(1)]; + tensor var_6542 = conv(dilations = var_6542_dilations_0, groups = var_6542_groups_0, pad = var_6542_pad_0, pad_type = var_6542_pad_type_0, strides = var_6542_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_6526_cast_fp16)[name = string("op_6542")]; + tensor var_6547 = const()[name = string("op_6547"), val = tensor([1, 16, 1, 128])]; + tensor var_6548 = reshape(shape = var_6547, x = var_6542)[name = string("op_6548")]; + string var_6564_pad_type_0 = const()[name = string("op_6564_pad_type_0"), val = string("valid")]; + tensor var_6564_strides_0 = const()[name = string("op_6564_strides_0"), val = tensor([1, 1])]; + tensor var_6564_pad_0 = const()[name = string("op_6564_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6564_dilations_0 = const()[name = string("op_6564_dilations_0"), val = tensor([1, 1])]; + int32 var_6564_groups_0 = const()[name = string("op_6564_groups_0"), val = int32(1)]; + tensor var_6564 = conv(dilations = var_6564_dilations_0, groups = var_6564_groups_0, pad = var_6564_pad_0, pad_type = var_6564_pad_type_0, strides = var_6564_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_6526_cast_fp16)[name = string("op_6564")]; + tensor var_6569 = const()[name = string("op_6569"), val = tensor([1, 8, 1, 128])]; + tensor var_6570 = reshape(shape = var_6569, x = var_6564)[name = string("op_6570")]; + string var_6586_pad_type_0 = const()[name = string("op_6586_pad_type_0"), val = string("valid")]; + tensor var_6586_strides_0 = const()[name = string("op_6586_strides_0"), val = tensor([1, 1])]; + tensor var_6586_pad_0 = const()[name = string("op_6586_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6586_dilations_0 = const()[name = string("op_6586_dilations_0"), val = tensor([1, 1])]; + int32 var_6586_groups_0 = const()[name = string("op_6586_groups_0"), val = int32(1)]; + tensor var_6586 = conv(dilations = var_6586_dilations_0, groups = var_6586_groups_0, pad = var_6586_pad_0, pad_type = var_6586_pad_type_0, strides = var_6586_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_6526_cast_fp16)[name = string("op_6586")]; + tensor var_6591 = const()[name = string("op_6591"), val = tensor([1, 8, 1, 128])]; + tensor var_6592 = reshape(shape = var_6591, x = var_6586)[name = string("op_6592")]; + int32 var_6607 = const()[name = string("op_6607"), val = int32(-1)]; + fp16 const_304_promoted = const()[name = string("const_304_promoted"), val = fp16(-0x1p+0)]; + tensor var_6609 = mul(x = var_6548, y = const_304_promoted)[name = string("op_6609")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_6607, interleave = input_185_interleave_0, values = (var_6548, var_6609))[name = string("input_185")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_6604_to_fp16 = const()[name = string("op_6604_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_6604_to_fp16, x = input_185)[name = string("normed_165_cast_fp16")]; + tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_167 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167")]; + tensor const_307 = const()[name = string("const_307"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427761664)))]; + tensor q_21 = mul(x = normed_167, y = const_307)[name = string("q_21")]; + int32 var_6632 = const()[name = string("op_6632"), val = int32(-1)]; + fp16 const_308_promoted = const()[name = string("const_308_promoted"), val = fp16(-0x1p+0)]; + tensor var_6634 = mul(x = var_6570, y = const_308_promoted)[name = string("op_6634")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_6632, interleave = input_187_interleave_0, values = (var_6570, var_6634))[name = string("input_187")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_6629_to_fp16 = const()[name = string("op_6629_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_6629_to_fp16, x = input_187)[name = string("normed_169_cast_fp16")]; + tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_171 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171")]; + tensor const_311 = const()[name = string("const_311"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427761984)))]; + tensor k_21 = mul(x = normed_171, y = const_311)[name = string("k_21")]; + tensor var_6648 = mul(x = q_21, y = cos_1_cast_fp16)[name = string("op_6648")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21)[name = string("x1_41")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21)[name = string("x2_41")]; + fp16 const_314_promoted = const()[name = string("const_314_promoted"), val = fp16(-0x1p+0)]; + tensor var_6669 = mul(x = x2_41, y = const_314_promoted)[name = string("op_6669")]; + int32 var_6671 = const()[name = string("op_6671"), val = int32(-1)]; + bool var_6672_interleave_0 = const()[name = string("op_6672_interleave_0"), val = bool(false)]; + tensor var_6672 = concat(axis = var_6671, interleave = var_6672_interleave_0, values = (var_6669, x1_41))[name = string("op_6672")]; + tensor var_6673 = mul(x = var_6672, y = sin_1_cast_fp16)[name = string("op_6673")]; + tensor query_states_41 = add(x = var_6648, y = var_6673)[name = string("query_states_41")]; + tensor var_6676 = mul(x = k_21, y = cos_1_cast_fp16)[name = string("op_6676")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21)[name = string("x1_43")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21)[name = string("x2_43")]; + fp16 const_317_promoted = const()[name = string("const_317_promoted"), val = fp16(-0x1p+0)]; + tensor var_6697 = mul(x = x2_43, y = const_317_promoted)[name = string("op_6697")]; + int32 var_6699 = const()[name = string("op_6699"), val = int32(-1)]; + bool var_6700_interleave_0 = const()[name = string("op_6700_interleave_0"), val = bool(false)]; + tensor var_6700 = concat(axis = var_6699, interleave = var_6700_interleave_0, values = (var_6697, x1_43))[name = string("op_6700")]; + tensor var_6701 = mul(x = var_6700, y = sin_1_cast_fp16)[name = string("op_6701")]; + tensor key_states_41 = add(x = var_6676, y = var_6701)[name = string("key_states_41")]; + tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; + tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_82")]; + tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; + tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; + int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; + bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; + tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_124, concat_83_values1_0, var_1746, concat_83_values3_0))[name = string("concat_83")]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_41, x = coreml_update_state_75)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; + tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([38])]; + tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; + tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([39])]; + int32 concat_86_axis_0 = const()[name = string("concat_86_axis_0"), val = int32(0)]; + bool concat_86_interleave_0 = const()[name = string("concat_86_interleave_0"), val = bool(false)]; + tensor concat_86 = concat(axis = concat_86_axis_0, interleave = concat_86_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_86")]; + tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; + tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; + int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; + bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; + tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (expand_dims_130, concat_87_values1_0, var_1746, concat_87_values3_0))[name = string("concat_87")]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = var_6592, x = coreml_update_state_76)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; + tensor var_6756_begin_0 = const()[name = string("op_6756_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_6756_end_0 = const()[name = string("op_6756_end_0"), val = tensor([11, 8, 4096, 128])]; + tensor var_6756_end_mask_0 = const()[name = string("op_6756_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6756_cast_fp16 = slice_by_index(begin = var_6756_begin_0, end = var_6756_end_0, end_mask = var_6756_end_mask_0, x = coreml_update_state_77)[name = string("op_6756_cast_fp16")]; + tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; + tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_6756_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; + tensor var_6763_begin_0 = const()[name = string("op_6763_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_6763_end_0 = const()[name = string("op_6763_end_0"), val = tensor([39, 8, 4096, 128])]; + tensor var_6763_end_mask_0 = const()[name = string("op_6763_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6763_cast_fp16 = slice_by_index(begin = var_6763_begin_0, end = var_6763_end_0, end_mask = var_6763_end_mask_0, x = coreml_update_state_77)[name = string("op_6763_cast_fp16")]; + tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; + tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_6763_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; + tensor x_163_axes_0 = const()[name = string("x_163_axes_0"), val = tensor([1])]; + tensor x_163_cast_fp16 = expand_dims(axes = x_163_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor var_6800 = const()[name = string("op_6800"), val = tensor([1, 2, 1, 1])]; + tensor x_165_cast_fp16 = tile(reps = var_6800, x = x_163_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor var_6812 = const()[name = string("op_6812"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_43_cast_fp16 = reshape(shape = var_6812, x = x_165_cast_fp16)[name = string("key_states_43_cast_fp16")]; + tensor x_169_axes_0 = const()[name = string("x_169_axes_0"), val = tensor([1])]; + tensor x_169_cast_fp16 = expand_dims(axes = x_169_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_6820 = const()[name = string("op_6820"), val = tensor([1, 2, 1, 1])]; + tensor x_171_cast_fp16 = tile(reps = var_6820, x = x_169_cast_fp16)[name = string("x_171_cast_fp16")]; + tensor var_6832 = const()[name = string("op_6832"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_63_cast_fp16 = reshape(shape = var_6832, x = x_171_cast_fp16)[name = string("value_states_63_cast_fp16")]; + bool var_6847_transpose_x_1 = const()[name = string("op_6847_transpose_x_1"), val = bool(false)]; + bool var_6847_transpose_y_1 = const()[name = string("op_6847_transpose_y_1"), val = bool(true)]; + tensor var_6847 = matmul(transpose_x = var_6847_transpose_x_1, transpose_y = var_6847_transpose_y_1, x = query_states_41, y = key_states_43_cast_fp16)[name = string("op_6847")]; + fp16 var_6848_to_fp16 = const()[name = string("op_6848_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_61_cast_fp16 = mul(x = var_6847, y = var_6848_to_fp16)[name = string("attn_weights_61_cast_fp16")]; + tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; + int32 var_6883 = const()[name = string("op_6883"), val = int32(-1)]; + tensor attn_weights_65_cast_fp16 = softmax(axis = var_6883, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; + bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; + bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; + tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = value_states_63_cast_fp16)[name = string("attn_output_101_cast_fp16")]; + tensor var_6894_perm_0 = const()[name = string("op_6894_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6898 = const()[name = string("op_6898"), val = tensor([1, 1, 2048])]; + tensor var_6894_cast_fp16 = transpose(perm = var_6894_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_106")]; + tensor attn_output_105_cast_fp16 = reshape(shape = var_6898, x = var_6894_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_6903 = const()[name = string("op_6903"), val = tensor([0, 2, 1])]; + string var_6919_pad_type_0 = const()[name = string("op_6919_pad_type_0"), val = string("valid")]; + int32 var_6919_groups_0 = const()[name = string("op_6919_groups_0"), val = int32(1)]; + tensor var_6919_strides_0 = const()[name = string("op_6919_strides_0"), val = tensor([1])]; + tensor var_6919_pad_0 = const()[name = string("op_6919_pad_0"), val = tensor([0, 0])]; + tensor var_6919_dilations_0 = const()[name = string("op_6919_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427762304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429859520))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6904_cast_fp16 = transpose(perm = var_6903, x = attn_output_105_cast_fp16)[name = string("transpose_105")]; + tensor var_6919_cast_fp16 = conv(dilations = var_6919_dilations_0, groups = var_6919_groups_0, pad = var_6919_pad_0, pad_type = var_6919_pad_type_0, strides = var_6919_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_6904_cast_fp16)[name = string("op_6919_cast_fp16")]; + tensor var_6923 = const()[name = string("op_6923"), val = tensor([0, 2, 1])]; + tensor attn_output_109_cast_fp16 = transpose(perm = var_6923, x = var_6919_cast_fp16)[name = string("transpose_104")]; + tensor hidden_states_109_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; + int32 var_6936 = const()[name = string("op_6936"), val = int32(-1)]; + fp16 const_326_promoted_to_fp16 = const()[name = string("const_326_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6938_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_326_promoted_to_fp16)[name = string("op_6938_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_6936, interleave = input_191_interleave_0, values = (hidden_states_109_cast_fp16, var_6938_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_6933_to_fp16 = const()[name = string("op_6933_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6933_to_fp16, x = input_191_cast_fp16)[name = string("normed_173_cast_fp16")]; + tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_175_cast_fp16 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175_cast_fp16")]; + tensor const_329_promoted_to_fp16 = const()[name = string("const_329_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429925120)))]; + tensor x_173_cast_fp16 = mul(x = normed_175_cast_fp16, y = const_329_promoted_to_fp16)[name = string("x_173_cast_fp16")]; + tensor var_6963 = const()[name = string("op_6963"), val = tensor([0, 2, 1])]; + tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; + tensor var_6964 = transpose(perm = var_6963, x = x_173_cast_fp16)[name = string("transpose_103")]; + tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_6964)[name = string("input_193")]; + string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; + tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; + tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; + int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; + tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; + string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; + tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; + tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; + int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; + tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; + tensor c_21 = silu(x = input_195)[name = string("c_21")]; + tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; + string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; + tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; + tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; + int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; + tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; + tensor var_6986_axes_0 = const()[name = string("op_6986_axes_0"), val = tensor([2])]; + tensor var_6986 = squeeze(axes = var_6986_axes_0, x = e_21)[name = string("op_6986")]; + tensor var_6987 = const()[name = string("op_6987"), val = tensor([0, 2, 1])]; + tensor var_6988 = transpose(perm = var_6987, x = var_6986)[name = string("transpose_102")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = var_6988)[name = string("hidden_states_111_cast_fp16")]; + int32 var_7000 = const()[name = string("op_7000"), val = int32(-1)]; + fp16 const_330_promoted_to_fp16 = const()[name = string("const_330_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7002_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_330_promoted_to_fp16)[name = string("op_7002_cast_fp16")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199_cast_fp16 = concat(axis = var_7000, interleave = input_199_interleave_0, values = (hidden_states_111_cast_fp16, var_7002_cast_fp16))[name = string("input_199_cast_fp16")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_6997_to_fp16 = const()[name = string("op_6997_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_6997_to_fp16, x = input_199_cast_fp16)[name = string("normed_177_cast_fp16")]; + tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_179_cast_fp16 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179_cast_fp16")]; + tensor const_333_promoted_to_fp16 = const()[name = string("const_333_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429927232)))]; + tensor hidden_states_113_cast_fp16 = mul(x = normed_179_cast_fp16, y = const_333_promoted_to_fp16)[name = string("hidden_states_113_cast_fp16")]; + tensor var_7019 = const()[name = string("op_7019"), val = tensor([0, 2, 1])]; + tensor var_7022_axes_0 = const()[name = string("op_7022_axes_0"), val = tensor([2])]; + tensor var_7020_cast_fp16 = transpose(perm = var_7019, x = hidden_states_113_cast_fp16)[name = string("transpose_101")]; + tensor var_7022_cast_fp16 = expand_dims(axes = var_7022_axes_0, x = var_7020_cast_fp16)[name = string("op_7022_cast_fp16")]; + string var_7038_pad_type_0 = const()[name = string("op_7038_pad_type_0"), val = string("valid")]; + tensor var_7038_strides_0 = const()[name = string("op_7038_strides_0"), val = tensor([1, 1])]; + tensor var_7038_pad_0 = const()[name = string("op_7038_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7038_dilations_0 = const()[name = string("op_7038_dilations_0"), val = tensor([1, 1])]; + int32 var_7038_groups_0 = const()[name = string("op_7038_groups_0"), val = int32(1)]; + tensor var_7038 = conv(dilations = var_7038_dilations_0, groups = var_7038_groups_0, pad = var_7038_pad_0, pad_type = var_7038_pad_type_0, strides = var_7038_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_7022_cast_fp16)[name = string("op_7038")]; + tensor var_7043 = const()[name = string("op_7043"), val = tensor([1, 16, 1, 128])]; + tensor var_7044 = reshape(shape = var_7043, x = var_7038)[name = string("op_7044")]; + string var_7060_pad_type_0 = const()[name = string("op_7060_pad_type_0"), val = string("valid")]; + tensor var_7060_strides_0 = const()[name = string("op_7060_strides_0"), val = tensor([1, 1])]; + tensor var_7060_pad_0 = const()[name = string("op_7060_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7060_dilations_0 = const()[name = string("op_7060_dilations_0"), val = tensor([1, 1])]; + int32 var_7060_groups_0 = const()[name = string("op_7060_groups_0"), val = int32(1)]; + tensor var_7060 = conv(dilations = var_7060_dilations_0, groups = var_7060_groups_0, pad = var_7060_pad_0, pad_type = var_7060_pad_type_0, strides = var_7060_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_7022_cast_fp16)[name = string("op_7060")]; + tensor var_7065 = const()[name = string("op_7065"), val = tensor([1, 8, 1, 128])]; + tensor var_7066 = reshape(shape = var_7065, x = var_7060)[name = string("op_7066")]; + string var_7082_pad_type_0 = const()[name = string("op_7082_pad_type_0"), val = string("valid")]; + tensor var_7082_strides_0 = const()[name = string("op_7082_strides_0"), val = tensor([1, 1])]; + tensor var_7082_pad_0 = const()[name = string("op_7082_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7082_dilations_0 = const()[name = string("op_7082_dilations_0"), val = tensor([1, 1])]; + int32 var_7082_groups_0 = const()[name = string("op_7082_groups_0"), val = int32(1)]; + tensor var_7082 = conv(dilations = var_7082_dilations_0, groups = var_7082_groups_0, pad = var_7082_pad_0, pad_type = var_7082_pad_type_0, strides = var_7082_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_7022_cast_fp16)[name = string("op_7082")]; + tensor var_7087 = const()[name = string("op_7087"), val = tensor([1, 8, 1, 128])]; + tensor var_7088 = reshape(shape = var_7087, x = var_7082)[name = string("op_7088")]; + int32 var_7103 = const()[name = string("op_7103"), val = int32(-1)]; + fp16 const_334_promoted = const()[name = string("const_334_promoted"), val = fp16(-0x1p+0)]; + tensor var_7105 = mul(x = var_7044, y = const_334_promoted)[name = string("op_7105")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203 = concat(axis = var_7103, interleave = input_203_interleave_0, values = (var_7044, var_7105))[name = string("input_203")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_7100_to_fp16 = const()[name = string("op_7100_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_7100_to_fp16, x = input_203)[name = string("normed_181_cast_fp16")]; + tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_183 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183")]; + tensor const_337 = const()[name = string("const_337"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429929344)))]; + tensor q_23 = mul(x = normed_183, y = const_337)[name = string("q_23")]; + int32 var_7128 = const()[name = string("op_7128"), val = int32(-1)]; + fp16 const_338_promoted = const()[name = string("const_338_promoted"), val = fp16(-0x1p+0)]; + tensor var_7130 = mul(x = var_7066, y = const_338_promoted)[name = string("op_7130")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205 = concat(axis = var_7128, interleave = input_205_interleave_0, values = (var_7066, var_7130))[name = string("input_205")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_7125_to_fp16 = const()[name = string("op_7125_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_7125_to_fp16, x = input_205)[name = string("normed_185_cast_fp16")]; + tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_187 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187")]; + tensor const_341 = const()[name = string("const_341"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429929664)))]; + tensor k_23 = mul(x = normed_187, y = const_341)[name = string("k_23")]; + tensor var_7144 = mul(x = q_23, y = cos_1_cast_fp16)[name = string("op_7144")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23)[name = string("x1_45")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23)[name = string("x2_45")]; + fp16 const_344_promoted = const()[name = string("const_344_promoted"), val = fp16(-0x1p+0)]; + tensor var_7165 = mul(x = x2_45, y = const_344_promoted)[name = string("op_7165")]; + int32 var_7167 = const()[name = string("op_7167"), val = int32(-1)]; + bool var_7168_interleave_0 = const()[name = string("op_7168_interleave_0"), val = bool(false)]; + tensor var_7168 = concat(axis = var_7167, interleave = var_7168_interleave_0, values = (var_7165, x1_45))[name = string("op_7168")]; + tensor var_7169 = mul(x = var_7168, y = sin_1_cast_fp16)[name = string("op_7169")]; + tensor query_states_45 = add(x = var_7144, y = var_7169)[name = string("query_states_45")]; + tensor var_7172 = mul(x = k_23, y = cos_1_cast_fp16)[name = string("op_7172")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23)[name = string("x1_47")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23)[name = string("x2_47")]; + fp16 const_347_promoted = const()[name = string("const_347_promoted"), val = fp16(-0x1p+0)]; + tensor var_7193 = mul(x = x2_47, y = const_347_promoted)[name = string("op_7193")]; + int32 var_7195 = const()[name = string("op_7195"), val = int32(-1)]; + bool var_7196_interleave_0 = const()[name = string("op_7196_interleave_0"), val = bool(false)]; + tensor var_7196 = concat(axis = var_7195, interleave = var_7196_interleave_0, values = (var_7193, x1_47))[name = string("op_7196")]; + tensor var_7197 = mul(x = var_7196, y = sin_1_cast_fp16)[name = string("op_7197")]; + tensor key_states_45 = add(x = var_7172, y = var_7197)[name = string("key_states_45")]; + tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; + tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; + tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; + tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; + int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; + bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; + tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_90")]; + tensor concat_91_values1_0 = const()[name = string("concat_91_values1_0"), val = tensor([0])]; + tensor concat_91_values3_0 = const()[name = string("concat_91_values3_0"), val = tensor([0])]; + int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; + bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; + tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (expand_dims_136, concat_91_values1_0, var_1746, concat_91_values3_0))[name = string("concat_91")]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_45, x = coreml_update_state_77)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; + tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([39])]; + tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; + tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; + tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([40])]; + int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; + bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; + tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_94")]; + tensor concat_95_values1_0 = const()[name = string("concat_95_values1_0"), val = tensor([0])]; + tensor concat_95_values3_0 = const()[name = string("concat_95_values3_0"), val = tensor([0])]; + int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; + bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; + tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (expand_dims_142, concat_95_values1_0, var_1746, concat_95_values3_0))[name = string("concat_95")]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_94, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_95, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = var_7088, x = coreml_update_state_78)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; + tensor var_7252_begin_0 = const()[name = string("op_7252_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_7252_end_0 = const()[name = string("op_7252_end_0"), val = tensor([12, 8, 4096, 128])]; + tensor var_7252_end_mask_0 = const()[name = string("op_7252_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7252_cast_fp16 = slice_by_index(begin = var_7252_begin_0, end = var_7252_end_0, end_mask = var_7252_end_mask_0, x = coreml_update_state_79)[name = string("op_7252_cast_fp16")]; + tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; + tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_7252_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; + tensor var_7259_begin_0 = const()[name = string("op_7259_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_7259_end_0 = const()[name = string("op_7259_end_0"), val = tensor([40, 8, 4096, 128])]; + tensor var_7259_end_mask_0 = const()[name = string("op_7259_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7259_cast_fp16 = slice_by_index(begin = var_7259_begin_0, end = var_7259_end_0, end_mask = var_7259_end_mask_0, x = coreml_update_state_79)[name = string("op_7259_cast_fp16")]; + tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; + tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_7259_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_7296 = const()[name = string("op_7296"), val = tensor([1, 2, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_7296, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_7308 = const()[name = string("op_7308"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_47_cast_fp16 = reshape(shape = var_7308, x = x_181_cast_fp16)[name = string("key_states_47_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_7316 = const()[name = string("op_7316"), val = tensor([1, 2, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_7316, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_7328 = const()[name = string("op_7328"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_69_cast_fp16 = reshape(shape = var_7328, x = x_187_cast_fp16)[name = string("value_states_69_cast_fp16")]; + bool var_7343_transpose_x_1 = const()[name = string("op_7343_transpose_x_1"), val = bool(false)]; + bool var_7343_transpose_y_1 = const()[name = string("op_7343_transpose_y_1"), val = bool(true)]; + tensor var_7343 = matmul(transpose_x = var_7343_transpose_x_1, transpose_y = var_7343_transpose_y_1, x = query_states_45, y = key_states_47_cast_fp16)[name = string("op_7343")]; + fp16 var_7344_to_fp16 = const()[name = string("op_7344_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_67_cast_fp16 = mul(x = var_7343, y = var_7344_to_fp16)[name = string("attn_weights_67_cast_fp16")]; + tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; + int32 var_7379 = const()[name = string("op_7379"), val = int32(-1)]; + tensor attn_weights_71_cast_fp16 = softmax(axis = var_7379, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; + bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; + bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; + tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = value_states_69_cast_fp16)[name = string("attn_output_111_cast_fp16")]; + tensor var_7390_perm_0 = const()[name = string("op_7390_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7394 = const()[name = string("op_7394"), val = tensor([1, 1, 2048])]; + tensor var_7390_cast_fp16 = transpose(perm = var_7390_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_100")]; + tensor attn_output_115_cast_fp16 = reshape(shape = var_7394, x = var_7390_cast_fp16)[name = string("attn_output_115_cast_fp16")]; + tensor var_7399 = const()[name = string("op_7399"), val = tensor([0, 2, 1])]; + string var_7415_pad_type_0 = const()[name = string("op_7415_pad_type_0"), val = string("valid")]; + int32 var_7415_groups_0 = const()[name = string("op_7415_groups_0"), val = int32(1)]; + tensor var_7415_strides_0 = const()[name = string("op_7415_strides_0"), val = tensor([1])]; + tensor var_7415_pad_0 = const()[name = string("op_7415_pad_0"), val = tensor([0, 0])]; + tensor var_7415_dilations_0 = const()[name = string("op_7415_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429929984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432027200))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7400_cast_fp16 = transpose(perm = var_7399, x = attn_output_115_cast_fp16)[name = string("transpose_99")]; + tensor var_7415_cast_fp16 = conv(dilations = var_7415_dilations_0, groups = var_7415_groups_0, pad = var_7415_pad_0, pad_type = var_7415_pad_type_0, strides = var_7415_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7400_cast_fp16)[name = string("op_7415_cast_fp16")]; + tensor var_7419 = const()[name = string("op_7419"), val = tensor([0, 2, 1])]; + tensor attn_output_119_cast_fp16 = transpose(perm = var_7419, x = var_7415_cast_fp16)[name = string("transpose_98")]; + tensor hidden_states_119_cast_fp16 = add(x = hidden_states_111_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + int32 var_7432 = const()[name = string("op_7432"), val = int32(-1)]; + fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7434_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = const_356_promoted_to_fp16)[name = string("op_7434_cast_fp16")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209_cast_fp16 = concat(axis = var_7432, interleave = input_209_interleave_0, values = (hidden_states_119_cast_fp16, var_7434_cast_fp16))[name = string("input_209_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_7429_to_fp16 = const()[name = string("op_7429_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_7429_to_fp16, x = input_209_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; + tensor const_359_promoted_to_fp16 = const()[name = string("const_359_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432092800)))]; + tensor x_189_cast_fp16 = mul(x = normed_191_cast_fp16, y = const_359_promoted_to_fp16)[name = string("x_189_cast_fp16")]; + tensor var_7459 = const()[name = string("op_7459"), val = tensor([0, 2, 1])]; + tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; + tensor var_7460 = transpose(perm = var_7459, x = x_189_cast_fp16)[name = string("transpose_97")]; + tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_7460)[name = string("input_211")]; + string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; + tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; + tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; + int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; + tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; + string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; + tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; + tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; + int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; + tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; + tensor c_23 = silu(x = input_213)[name = string("c_23")]; + tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; + string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; + tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; + tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; + int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; + tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; + tensor var_7482_axes_0 = const()[name = string("op_7482_axes_0"), val = tensor([2])]; + tensor var_7482 = squeeze(axes = var_7482_axes_0, x = e_23)[name = string("op_7482")]; + tensor var_7483 = const()[name = string("op_7483"), val = tensor([0, 2, 1])]; + tensor var_7484 = transpose(perm = var_7483, x = var_7482)[name = string("transpose_96")]; + tensor hidden_states_121_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = var_7484)[name = string("hidden_states_121_cast_fp16")]; + int32 var_7496 = const()[name = string("op_7496"), val = int32(-1)]; + fp16 const_360_promoted_to_fp16 = const()[name = string("const_360_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7498_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = const_360_promoted_to_fp16)[name = string("op_7498_cast_fp16")]; + bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; + tensor input_217_cast_fp16 = concat(axis = var_7496, interleave = input_217_interleave_0, values = (hidden_states_121_cast_fp16, var_7498_cast_fp16))[name = string("input_217_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_7493_to_fp16 = const()[name = string("op_7493_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_7493_to_fp16, x = input_217_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; + tensor const_363_promoted_to_fp16 = const()[name = string("const_363_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432094912)))]; + tensor hidden_states_123_cast_fp16 = mul(x = normed_195_cast_fp16, y = const_363_promoted_to_fp16)[name = string("hidden_states_123_cast_fp16")]; + tensor var_7515 = const()[name = string("op_7515"), val = tensor([0, 2, 1])]; + tensor var_7518_axes_0 = const()[name = string("op_7518_axes_0"), val = tensor([2])]; + tensor var_7516_cast_fp16 = transpose(perm = var_7515, x = hidden_states_123_cast_fp16)[name = string("transpose_95")]; + tensor var_7518_cast_fp16 = expand_dims(axes = var_7518_axes_0, x = var_7516_cast_fp16)[name = string("op_7518_cast_fp16")]; + string var_7534_pad_type_0 = const()[name = string("op_7534_pad_type_0"), val = string("valid")]; + tensor var_7534_strides_0 = const()[name = string("op_7534_strides_0"), val = tensor([1, 1])]; + tensor var_7534_pad_0 = const()[name = string("op_7534_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7534_dilations_0 = const()[name = string("op_7534_dilations_0"), val = tensor([1, 1])]; + int32 var_7534_groups_0 = const()[name = string("op_7534_groups_0"), val = int32(1)]; + tensor var_7534 = conv(dilations = var_7534_dilations_0, groups = var_7534_groups_0, pad = var_7534_pad_0, pad_type = var_7534_pad_type_0, strides = var_7534_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_7518_cast_fp16)[name = string("op_7534")]; + tensor var_7539 = const()[name = string("op_7539"), val = tensor([1, 16, 1, 128])]; + tensor var_7540 = reshape(shape = var_7539, x = var_7534)[name = string("op_7540")]; + string var_7556_pad_type_0 = const()[name = string("op_7556_pad_type_0"), val = string("valid")]; + tensor var_7556_strides_0 = const()[name = string("op_7556_strides_0"), val = tensor([1, 1])]; + tensor var_7556_pad_0 = const()[name = string("op_7556_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7556_dilations_0 = const()[name = string("op_7556_dilations_0"), val = tensor([1, 1])]; + int32 var_7556_groups_0 = const()[name = string("op_7556_groups_0"), val = int32(1)]; + tensor var_7556 = conv(dilations = var_7556_dilations_0, groups = var_7556_groups_0, pad = var_7556_pad_0, pad_type = var_7556_pad_type_0, strides = var_7556_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_7518_cast_fp16)[name = string("op_7556")]; + tensor var_7561 = const()[name = string("op_7561"), val = tensor([1, 8, 1, 128])]; + tensor var_7562 = reshape(shape = var_7561, x = var_7556)[name = string("op_7562")]; + string var_7578_pad_type_0 = const()[name = string("op_7578_pad_type_0"), val = string("valid")]; + tensor var_7578_strides_0 = const()[name = string("op_7578_strides_0"), val = tensor([1, 1])]; + tensor var_7578_pad_0 = const()[name = string("op_7578_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7578_dilations_0 = const()[name = string("op_7578_dilations_0"), val = tensor([1, 1])]; + int32 var_7578_groups_0 = const()[name = string("op_7578_groups_0"), val = int32(1)]; + tensor var_7578 = conv(dilations = var_7578_dilations_0, groups = var_7578_groups_0, pad = var_7578_pad_0, pad_type = var_7578_pad_type_0, strides = var_7578_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_7518_cast_fp16)[name = string("op_7578")]; + tensor var_7583 = const()[name = string("op_7583"), val = tensor([1, 8, 1, 128])]; + tensor var_7584 = reshape(shape = var_7583, x = var_7578)[name = string("op_7584")]; + int32 var_7599 = const()[name = string("op_7599"), val = int32(-1)]; + fp16 const_364_promoted = const()[name = string("const_364_promoted"), val = fp16(-0x1p+0)]; + tensor var_7601 = mul(x = var_7540, y = const_364_promoted)[name = string("op_7601")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221 = concat(axis = var_7599, interleave = input_221_interleave_0, values = (var_7540, var_7601))[name = string("input_221")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_7596_to_fp16 = const()[name = string("op_7596_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_7596_to_fp16, x = input_221)[name = string("normed_197_cast_fp16")]; + tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; + tensor const_367 = const()[name = string("const_367"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432097024)))]; + tensor q_25 = mul(x = normed_199, y = const_367)[name = string("q_25")]; + int32 var_7624 = const()[name = string("op_7624"), val = int32(-1)]; + fp16 const_368_promoted = const()[name = string("const_368_promoted"), val = fp16(-0x1p+0)]; + tensor var_7626 = mul(x = var_7562, y = const_368_promoted)[name = string("op_7626")]; + bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)]; + tensor input_223 = concat(axis = var_7624, interleave = input_223_interleave_0, values = (var_7562, var_7626))[name = string("input_223")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_7621_to_fp16 = const()[name = string("op_7621_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_7621_to_fp16, x = input_223)[name = string("normed_201_cast_fp16")]; + tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; + tensor const_371 = const()[name = string("const_371"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432097344)))]; + tensor k_25 = mul(x = normed_203, y = const_371)[name = string("k_25")]; + tensor var_7640 = mul(x = q_25, y = cos_1_cast_fp16)[name = string("op_7640")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25)[name = string("x1_49")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25)[name = string("x2_49")]; + fp16 const_374_promoted = const()[name = string("const_374_promoted"), val = fp16(-0x1p+0)]; + tensor var_7661 = mul(x = x2_49, y = const_374_promoted)[name = string("op_7661")]; + int32 var_7663 = const()[name = string("op_7663"), val = int32(-1)]; + bool var_7664_interleave_0 = const()[name = string("op_7664_interleave_0"), val = bool(false)]; + tensor var_7664 = concat(axis = var_7663, interleave = var_7664_interleave_0, values = (var_7661, x1_49))[name = string("op_7664")]; + tensor var_7665 = mul(x = var_7664, y = sin_1_cast_fp16)[name = string("op_7665")]; + tensor query_states_49 = add(x = var_7640, y = var_7665)[name = string("query_states_49")]; + tensor var_7668 = mul(x = k_25, y = cos_1_cast_fp16)[name = string("op_7668")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25)[name = string("x1_51")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25)[name = string("x2_51")]; + fp16 const_377_promoted = const()[name = string("const_377_promoted"), val = fp16(-0x1p+0)]; + tensor var_7689 = mul(x = x2_51, y = const_377_promoted)[name = string("op_7689")]; + int32 var_7691 = const()[name = string("op_7691"), val = int32(-1)]; + bool var_7692_interleave_0 = const()[name = string("op_7692_interleave_0"), val = bool(false)]; + tensor var_7692 = concat(axis = var_7691, interleave = var_7692_interleave_0, values = (var_7689, x1_51))[name = string("op_7692")]; + tensor var_7693 = mul(x = var_7692, y = sin_1_cast_fp16)[name = string("op_7693")]; + tensor key_states_49 = add(x = var_7668, y = var_7693)[name = string("key_states_49")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; + tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; + tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; + tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; + int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; + bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; + tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_98")]; + tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; + tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; + int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; + bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; + tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_148, concat_99_values1_0, var_1746, concat_99_values3_0))[name = string("concat_99")]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_49, x = coreml_update_state_79)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; + tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([40])]; + tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; + tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; + tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([41])]; + int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; + bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; + tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_102")]; + tensor concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = tensor([0])]; + tensor concat_103_values3_0 = const()[name = string("concat_103_values3_0"), val = tensor([0])]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (expand_dims_154, concat_103_values1_0, var_1746, concat_103_values3_0))[name = string("concat_103")]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = var_7584, x = coreml_update_state_80)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; + tensor var_7748_begin_0 = const()[name = string("op_7748_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_7748_end_0 = const()[name = string("op_7748_end_0"), val = tensor([13, 8, 4096, 128])]; + tensor var_7748_end_mask_0 = const()[name = string("op_7748_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7748_cast_fp16 = slice_by_index(begin = var_7748_begin_0, end = var_7748_end_0, end_mask = var_7748_end_mask_0, x = coreml_update_state_81)[name = string("op_7748_cast_fp16")]; + tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; + tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_7748_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; + tensor var_7755_begin_0 = const()[name = string("op_7755_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_7755_end_0 = const()[name = string("op_7755_end_0"), val = tensor([41, 8, 4096, 128])]; + tensor var_7755_end_mask_0 = const()[name = string("op_7755_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7755_cast_fp16 = slice_by_index(begin = var_7755_begin_0, end = var_7755_end_0, end_mask = var_7755_end_mask_0, x = coreml_update_state_81)[name = string("op_7755_cast_fp16")]; + tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; + tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_7755_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; + tensor x_195_axes_0 = const()[name = string("x_195_axes_0"), val = tensor([1])]; + tensor x_195_cast_fp16 = expand_dims(axes = x_195_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_195_cast_fp16")]; + tensor var_7792 = const()[name = string("op_7792"), val = tensor([1, 2, 1, 1])]; + tensor x_197_cast_fp16 = tile(reps = var_7792, x = x_195_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_7804 = const()[name = string("op_7804"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_51_cast_fp16 = reshape(shape = var_7804, x = x_197_cast_fp16)[name = string("key_states_51_cast_fp16")]; + tensor x_201_axes_0 = const()[name = string("x_201_axes_0"), val = tensor([1])]; + tensor x_201_cast_fp16 = expand_dims(axes = x_201_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_201_cast_fp16")]; + tensor var_7812 = const()[name = string("op_7812"), val = tensor([1, 2, 1, 1])]; + tensor x_203_cast_fp16 = tile(reps = var_7812, x = x_201_cast_fp16)[name = string("x_203_cast_fp16")]; + tensor var_7824 = const()[name = string("op_7824"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_75_cast_fp16 = reshape(shape = var_7824, x = x_203_cast_fp16)[name = string("value_states_75_cast_fp16")]; + bool var_7839_transpose_x_1 = const()[name = string("op_7839_transpose_x_1"), val = bool(false)]; + bool var_7839_transpose_y_1 = const()[name = string("op_7839_transpose_y_1"), val = bool(true)]; + tensor var_7839 = matmul(transpose_x = var_7839_transpose_x_1, transpose_y = var_7839_transpose_y_1, x = query_states_49, y = key_states_51_cast_fp16)[name = string("op_7839")]; + fp16 var_7840_to_fp16 = const()[name = string("op_7840_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_73_cast_fp16 = mul(x = var_7839, y = var_7840_to_fp16)[name = string("attn_weights_73_cast_fp16")]; + tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; + int32 var_7875 = const()[name = string("op_7875"), val = int32(-1)]; + tensor attn_weights_77_cast_fp16 = softmax(axis = var_7875, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; + bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; + bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; + tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = value_states_75_cast_fp16)[name = string("attn_output_121_cast_fp16")]; + tensor var_7886_perm_0 = const()[name = string("op_7886_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7890 = const()[name = string("op_7890"), val = tensor([1, 1, 2048])]; + tensor var_7886_cast_fp16 = transpose(perm = var_7886_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_94")]; + tensor attn_output_125_cast_fp16 = reshape(shape = var_7890, x = var_7886_cast_fp16)[name = string("attn_output_125_cast_fp16")]; + tensor var_7895 = const()[name = string("op_7895"), val = tensor([0, 2, 1])]; + string var_7911_pad_type_0 = const()[name = string("op_7911_pad_type_0"), val = string("valid")]; + int32 var_7911_groups_0 = const()[name = string("op_7911_groups_0"), val = int32(1)]; + tensor var_7911_strides_0 = const()[name = string("op_7911_strides_0"), val = tensor([1])]; + tensor var_7911_pad_0 = const()[name = string("op_7911_pad_0"), val = tensor([0, 0])]; + tensor var_7911_dilations_0 = const()[name = string("op_7911_dilations_0"), val = tensor([1])]; + tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432097664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434194880))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7896_cast_fp16 = transpose(perm = var_7895, x = attn_output_125_cast_fp16)[name = string("transpose_93")]; + tensor var_7911_cast_fp16 = conv(dilations = var_7911_dilations_0, groups = var_7911_groups_0, pad = var_7911_pad_0, pad_type = var_7911_pad_type_0, strides = var_7911_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_7896_cast_fp16)[name = string("op_7911_cast_fp16")]; + tensor var_7915 = const()[name = string("op_7915"), val = tensor([0, 2, 1])]; + tensor attn_output_129_cast_fp16 = transpose(perm = var_7915, x = var_7911_cast_fp16)[name = string("transpose_92")]; + tensor hidden_states_129_cast_fp16 = add(x = hidden_states_121_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; + int32 var_7928 = const()[name = string("op_7928"), val = int32(-1)]; + fp16 const_386_promoted_to_fp16 = const()[name = string("const_386_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7930_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_386_promoted_to_fp16)[name = string("op_7930_cast_fp16")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227_cast_fp16 = concat(axis = var_7928, interleave = input_227_interleave_0, values = (hidden_states_129_cast_fp16, var_7930_cast_fp16))[name = string("input_227_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_7925_to_fp16 = const()[name = string("op_7925_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_7925_to_fp16, x = input_227_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; + tensor const_389_promoted_to_fp16 = const()[name = string("const_389_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434260480)))]; + tensor x_205_cast_fp16 = mul(x = normed_207_cast_fp16, y = const_389_promoted_to_fp16)[name = string("x_205_cast_fp16")]; + tensor var_7955 = const()[name = string("op_7955"), val = tensor([0, 2, 1])]; + tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; + tensor var_7956 = transpose(perm = var_7955, x = x_205_cast_fp16)[name = string("transpose_91")]; + tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_7956)[name = string("input_229")]; + string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; + tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; + tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; + int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; + tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; + string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; + tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; + tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; + int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; + tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; + tensor c_25 = silu(x = input_231)[name = string("c_25")]; + tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; + string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; + tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; + tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; + int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; + tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; + tensor var_7978_axes_0 = const()[name = string("op_7978_axes_0"), val = tensor([2])]; + tensor var_7978 = squeeze(axes = var_7978_axes_0, x = e_25)[name = string("op_7978")]; + tensor var_7979 = const()[name = string("op_7979"), val = tensor([0, 2, 1])]; + tensor var_7980 = transpose(perm = var_7979, x = var_7978)[name = string("transpose_90")]; + tensor hidden_states_131_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = var_7980)[name = string("hidden_states_131_cast_fp16")]; + int32 var_7992 = const()[name = string("op_7992"), val = int32(-1)]; + fp16 const_390_promoted_to_fp16 = const()[name = string("const_390_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7994_cast_fp16 = mul(x = hidden_states_131_cast_fp16, y = const_390_promoted_to_fp16)[name = string("op_7994_cast_fp16")]; + bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)]; + tensor input_235_cast_fp16 = concat(axis = var_7992, interleave = input_235_interleave_0, values = (hidden_states_131_cast_fp16, var_7994_cast_fp16))[name = string("input_235_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_7989_to_fp16 = const()[name = string("op_7989_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_7989_to_fp16, x = input_235_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; + tensor const_393_promoted_to_fp16 = const()[name = string("const_393_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434262592)))]; + tensor hidden_states_133_cast_fp16 = mul(x = normed_211_cast_fp16, y = const_393_promoted_to_fp16)[name = string("hidden_states_133_cast_fp16")]; + tensor var_8011 = const()[name = string("op_8011"), val = tensor([0, 2, 1])]; + tensor var_8014_axes_0 = const()[name = string("op_8014_axes_0"), val = tensor([2])]; + tensor var_8012_cast_fp16 = transpose(perm = var_8011, x = hidden_states_133_cast_fp16)[name = string("transpose_89")]; + tensor var_8014_cast_fp16 = expand_dims(axes = var_8014_axes_0, x = var_8012_cast_fp16)[name = string("op_8014_cast_fp16")]; + string var_8030_pad_type_0 = const()[name = string("op_8030_pad_type_0"), val = string("valid")]; + tensor var_8030_strides_0 = const()[name = string("op_8030_strides_0"), val = tensor([1, 1])]; + tensor var_8030_pad_0 = const()[name = string("op_8030_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8030_dilations_0 = const()[name = string("op_8030_dilations_0"), val = tensor([1, 1])]; + int32 var_8030_groups_0 = const()[name = string("op_8030_groups_0"), val = int32(1)]; + tensor var_8030 = conv(dilations = var_8030_dilations_0, groups = var_8030_groups_0, pad = var_8030_pad_0, pad_type = var_8030_pad_type_0, strides = var_8030_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_8014_cast_fp16)[name = string("op_8030")]; + tensor var_8035 = const()[name = string("op_8035"), val = tensor([1, 16, 1, 128])]; + tensor var_8036 = reshape(shape = var_8035, x = var_8030)[name = string("op_8036")]; + string var_8052_pad_type_0 = const()[name = string("op_8052_pad_type_0"), val = string("valid")]; + tensor var_8052_strides_0 = const()[name = string("op_8052_strides_0"), val = tensor([1, 1])]; + tensor var_8052_pad_0 = const()[name = string("op_8052_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8052_dilations_0 = const()[name = string("op_8052_dilations_0"), val = tensor([1, 1])]; + int32 var_8052_groups_0 = const()[name = string("op_8052_groups_0"), val = int32(1)]; + tensor var_8052 = conv(dilations = var_8052_dilations_0, groups = var_8052_groups_0, pad = var_8052_pad_0, pad_type = var_8052_pad_type_0, strides = var_8052_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_8014_cast_fp16)[name = string("op_8052")]; + tensor var_8057 = const()[name = string("op_8057"), val = tensor([1, 8, 1, 128])]; + tensor var_8058 = reshape(shape = var_8057, x = var_8052)[name = string("op_8058")]; + string var_8074_pad_type_0 = const()[name = string("op_8074_pad_type_0"), val = string("valid")]; + tensor var_8074_strides_0 = const()[name = string("op_8074_strides_0"), val = tensor([1, 1])]; + tensor var_8074_pad_0 = const()[name = string("op_8074_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8074_dilations_0 = const()[name = string("op_8074_dilations_0"), val = tensor([1, 1])]; + int32 var_8074_groups_0 = const()[name = string("op_8074_groups_0"), val = int32(1)]; + tensor var_8074 = conv(dilations = var_8074_dilations_0, groups = var_8074_groups_0, pad = var_8074_pad_0, pad_type = var_8074_pad_type_0, strides = var_8074_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_8014_cast_fp16)[name = string("op_8074")]; + tensor var_8079 = const()[name = string("op_8079"), val = tensor([1, 8, 1, 128])]; + tensor var_8080 = reshape(shape = var_8079, x = var_8074)[name = string("op_8080")]; + int32 var_8095 = const()[name = string("op_8095"), val = int32(-1)]; + fp16 const_394_promoted = const()[name = string("const_394_promoted"), val = fp16(-0x1p+0)]; + tensor var_8097 = mul(x = var_8036, y = const_394_promoted)[name = string("op_8097")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239 = concat(axis = var_8095, interleave = input_239_interleave_0, values = (var_8036, var_8097))[name = string("input_239")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_8092_to_fp16 = const()[name = string("op_8092_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_8092_to_fp16, x = input_239)[name = string("normed_213_cast_fp16")]; + tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_215 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215")]; + tensor const_397 = const()[name = string("const_397"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434264704)))]; + tensor q_27 = mul(x = normed_215, y = const_397)[name = string("q_27")]; + int32 var_8120 = const()[name = string("op_8120"), val = int32(-1)]; + fp16 const_398_promoted = const()[name = string("const_398_promoted"), val = fp16(-0x1p+0)]; + tensor var_8122 = mul(x = var_8058, y = const_398_promoted)[name = string("op_8122")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241 = concat(axis = var_8120, interleave = input_241_interleave_0, values = (var_8058, var_8122))[name = string("input_241")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_8117_to_fp16 = const()[name = string("op_8117_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_8117_to_fp16, x = input_241)[name = string("normed_217_cast_fp16")]; + tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_219 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219")]; + tensor const_401 = const()[name = string("const_401"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434265024)))]; + tensor k_27 = mul(x = normed_219, y = const_401)[name = string("k_27")]; + tensor var_8136 = mul(x = q_27, y = cos_1_cast_fp16)[name = string("op_8136")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27)[name = string("x1_53")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27)[name = string("x2_53")]; + fp16 const_404_promoted = const()[name = string("const_404_promoted"), val = fp16(-0x1p+0)]; + tensor var_8157 = mul(x = x2_53, y = const_404_promoted)[name = string("op_8157")]; + int32 var_8159 = const()[name = string("op_8159"), val = int32(-1)]; + bool var_8160_interleave_0 = const()[name = string("op_8160_interleave_0"), val = bool(false)]; + tensor var_8160 = concat(axis = var_8159, interleave = var_8160_interleave_0, values = (var_8157, x1_53))[name = string("op_8160")]; + tensor var_8161 = mul(x = var_8160, y = sin_1_cast_fp16)[name = string("op_8161")]; + tensor query_states_53 = add(x = var_8136, y = var_8161)[name = string("query_states_53")]; + tensor var_8164 = mul(x = k_27, y = cos_1_cast_fp16)[name = string("op_8164")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27)[name = string("x1_55")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27)[name = string("x2_55")]; + fp16 const_407_promoted = const()[name = string("const_407_promoted"), val = fp16(-0x1p+0)]; + tensor var_8185 = mul(x = x2_55, y = const_407_promoted)[name = string("op_8185")]; + int32 var_8187 = const()[name = string("op_8187"), val = int32(-1)]; + bool var_8188_interleave_0 = const()[name = string("op_8188_interleave_0"), val = bool(false)]; + tensor var_8188 = concat(axis = var_8187, interleave = var_8188_interleave_0, values = (var_8185, x1_55))[name = string("op_8188")]; + tensor var_8189 = mul(x = var_8188, y = sin_1_cast_fp16)[name = string("op_8189")]; + tensor key_states_53 = add(x = var_8164, y = var_8189)[name = string("key_states_53")]; + tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; + tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; + tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; + int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; + bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; + tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_106")]; + tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; + tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; + int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; + bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; + tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_160, concat_107_values1_0, var_1746, concat_107_values3_0))[name = string("concat_107")]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_53, x = coreml_update_state_81)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([41])]; + tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; + tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; + tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([42])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_166, concat_111_values1_0, var_1746, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = var_8080, x = coreml_update_state_82)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; + tensor var_8244_begin_0 = const()[name = string("op_8244_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_8244_end_0 = const()[name = string("op_8244_end_0"), val = tensor([14, 8, 4096, 128])]; + tensor var_8244_end_mask_0 = const()[name = string("op_8244_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8244_cast_fp16 = slice_by_index(begin = var_8244_begin_0, end = var_8244_end_0, end_mask = var_8244_end_mask_0, x = coreml_update_state_83)[name = string("op_8244_cast_fp16")]; + tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; + tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_8244_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; + tensor var_8251_begin_0 = const()[name = string("op_8251_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_8251_end_0 = const()[name = string("op_8251_end_0"), val = tensor([42, 8, 4096, 128])]; + tensor var_8251_end_mask_0 = const()[name = string("op_8251_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8251_cast_fp16 = slice_by_index(begin = var_8251_begin_0, end = var_8251_end_0, end_mask = var_8251_end_mask_0, x = coreml_update_state_83)[name = string("op_8251_cast_fp16")]; + tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; + tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_8251_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; + tensor x_211_axes_0 = const()[name = string("x_211_axes_0"), val = tensor([1])]; + tensor x_211_cast_fp16 = expand_dims(axes = x_211_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_211_cast_fp16")]; + tensor var_8288 = const()[name = string("op_8288"), val = tensor([1, 2, 1, 1])]; + tensor x_213_cast_fp16 = tile(reps = var_8288, x = x_211_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_8300 = const()[name = string("op_8300"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_55_cast_fp16 = reshape(shape = var_8300, x = x_213_cast_fp16)[name = string("key_states_55_cast_fp16")]; + tensor x_217_axes_0 = const()[name = string("x_217_axes_0"), val = tensor([1])]; + tensor x_217_cast_fp16 = expand_dims(axes = x_217_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_8308 = const()[name = string("op_8308"), val = tensor([1, 2, 1, 1])]; + tensor x_219_cast_fp16 = tile(reps = var_8308, x = x_217_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor var_8320 = const()[name = string("op_8320"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_81_cast_fp16 = reshape(shape = var_8320, x = x_219_cast_fp16)[name = string("value_states_81_cast_fp16")]; + bool var_8335_transpose_x_1 = const()[name = string("op_8335_transpose_x_1"), val = bool(false)]; + bool var_8335_transpose_y_1 = const()[name = string("op_8335_transpose_y_1"), val = bool(true)]; + tensor var_8335 = matmul(transpose_x = var_8335_transpose_x_1, transpose_y = var_8335_transpose_y_1, x = query_states_53, y = key_states_55_cast_fp16)[name = string("op_8335")]; + fp16 var_8336_to_fp16 = const()[name = string("op_8336_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_79_cast_fp16 = mul(x = var_8335, y = var_8336_to_fp16)[name = string("attn_weights_79_cast_fp16")]; + tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = causal_mask)[name = string("attn_weights_81_cast_fp16")]; + int32 var_8371 = const()[name = string("op_8371"), val = int32(-1)]; + tensor attn_weights_83_cast_fp16 = softmax(axis = var_8371, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; + bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; + bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; + tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = value_states_81_cast_fp16)[name = string("attn_output_131_cast_fp16")]; + tensor var_8382_perm_0 = const()[name = string("op_8382_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8386 = const()[name = string("op_8386"), val = tensor([1, 1, 2048])]; + tensor var_8382_cast_fp16 = transpose(perm = var_8382_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_88")]; + tensor attn_output_135_cast_fp16 = reshape(shape = var_8386, x = var_8382_cast_fp16)[name = string("attn_output_135_cast_fp16")]; + tensor var_8391 = const()[name = string("op_8391"), val = tensor([0, 2, 1])]; + string var_8407_pad_type_0 = const()[name = string("op_8407_pad_type_0"), val = string("valid")]; + int32 var_8407_groups_0 = const()[name = string("op_8407_groups_0"), val = int32(1)]; + tensor var_8407_strides_0 = const()[name = string("op_8407_strides_0"), val = tensor([1])]; + tensor var_8407_pad_0 = const()[name = string("op_8407_pad_0"), val = tensor([0, 0])]; + tensor var_8407_dilations_0 = const()[name = string("op_8407_dilations_0"), val = tensor([1])]; + tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434265344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436362560))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8392_cast_fp16 = transpose(perm = var_8391, x = attn_output_135_cast_fp16)[name = string("transpose_87")]; + tensor var_8407_cast_fp16 = conv(dilations = var_8407_dilations_0, groups = var_8407_groups_0, pad = var_8407_pad_0, pad_type = var_8407_pad_type_0, strides = var_8407_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_8392_cast_fp16)[name = string("op_8407_cast_fp16")]; + tensor var_8411 = const()[name = string("op_8411"), val = tensor([0, 2, 1])]; + tensor attn_output_139_cast_fp16 = transpose(perm = var_8411, x = var_8407_cast_fp16)[name = string("transpose_86")]; + tensor hidden_states_139_cast_fp16 = add(x = hidden_states_131_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; + int32 var_8424 = const()[name = string("op_8424"), val = int32(-1)]; + fp16 const_416_promoted_to_fp16 = const()[name = string("const_416_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8426_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = const_416_promoted_to_fp16)[name = string("op_8426_cast_fp16")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245_cast_fp16 = concat(axis = var_8424, interleave = input_245_interleave_0, values = (hidden_states_139_cast_fp16, var_8426_cast_fp16))[name = string("input_245_cast_fp16")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_8421_to_fp16 = const()[name = string("op_8421_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_8421_to_fp16, x = input_245_cast_fp16)[name = string("normed_221_cast_fp16")]; + tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_223_cast_fp16 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223_cast_fp16")]; + tensor const_419_promoted_to_fp16 = const()[name = string("const_419_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436428160)))]; + tensor x_221_cast_fp16 = mul(x = normed_223_cast_fp16, y = const_419_promoted_to_fp16)[name = string("x_221_cast_fp16")]; + tensor var_8451 = const()[name = string("op_8451"), val = tensor([0, 2, 1])]; + tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; + tensor var_8452 = transpose(perm = var_8451, x = x_221_cast_fp16)[name = string("transpose_85")]; + tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_8452)[name = string("input_247")]; + string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; + tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; + tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; + int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; + tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; + string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; + tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; + tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; + int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; + tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; + tensor c_27 = silu(x = input_249)[name = string("c_27")]; + tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; + string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; + tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; + tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; + int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; + tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; + tensor var_8474_axes_0 = const()[name = string("op_8474_axes_0"), val = tensor([2])]; + tensor var_8474 = squeeze(axes = var_8474_axes_0, x = e_27)[name = string("op_8474")]; + tensor var_8475 = const()[name = string("op_8475"), val = tensor([0, 2, 1])]; + tensor var_8476 = transpose(perm = var_8475, x = var_8474)[name = string("transpose_84")]; + tensor hidden_states_141_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = var_8476)[name = string("hidden_states_141_cast_fp16")]; + int32 var_8488 = const()[name = string("op_8488"), val = int32(-1)]; + fp16 const_420_promoted_to_fp16 = const()[name = string("const_420_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8490_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_420_promoted_to_fp16)[name = string("op_8490_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_8488, interleave = input_253_interleave_0, values = (hidden_states_141_cast_fp16, var_8490_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_8485_to_fp16 = const()[name = string("op_8485_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_8485_to_fp16, x = input_253_cast_fp16)[name = string("normed_225_cast_fp16")]; + tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_227_cast_fp16 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227_cast_fp16")]; + tensor const_423_promoted_to_fp16 = const()[name = string("const_423_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436430272)))]; + tensor hidden_states_143_cast_fp16 = mul(x = normed_227_cast_fp16, y = const_423_promoted_to_fp16)[name = string("hidden_states_143_cast_fp16")]; + tensor var_8507 = const()[name = string("op_8507"), val = tensor([0, 2, 1])]; + tensor var_8510_axes_0 = const()[name = string("op_8510_axes_0"), val = tensor([2])]; + tensor var_8508_cast_fp16 = transpose(perm = var_8507, x = hidden_states_143_cast_fp16)[name = string("transpose_83")]; + tensor var_8510_cast_fp16 = expand_dims(axes = var_8510_axes_0, x = var_8508_cast_fp16)[name = string("op_8510_cast_fp16")]; + string var_8526_pad_type_0 = const()[name = string("op_8526_pad_type_0"), val = string("valid")]; + tensor var_8526_strides_0 = const()[name = string("op_8526_strides_0"), val = tensor([1, 1])]; + tensor var_8526_pad_0 = const()[name = string("op_8526_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8526_dilations_0 = const()[name = string("op_8526_dilations_0"), val = tensor([1, 1])]; + int32 var_8526_groups_0 = const()[name = string("op_8526_groups_0"), val = int32(1)]; + tensor var_8526 = conv(dilations = var_8526_dilations_0, groups = var_8526_groups_0, pad = var_8526_pad_0, pad_type = var_8526_pad_type_0, strides = var_8526_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_8510_cast_fp16)[name = string("op_8526")]; + tensor var_8531 = const()[name = string("op_8531"), val = tensor([1, 16, 1, 128])]; + tensor var_8532 = reshape(shape = var_8531, x = var_8526)[name = string("op_8532")]; + string var_8548_pad_type_0 = const()[name = string("op_8548_pad_type_0"), val = string("valid")]; + tensor var_8548_strides_0 = const()[name = string("op_8548_strides_0"), val = tensor([1, 1])]; + tensor var_8548_pad_0 = const()[name = string("op_8548_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8548_dilations_0 = const()[name = string("op_8548_dilations_0"), val = tensor([1, 1])]; + int32 var_8548_groups_0 = const()[name = string("op_8548_groups_0"), val = int32(1)]; + tensor var_8548 = conv(dilations = var_8548_dilations_0, groups = var_8548_groups_0, pad = var_8548_pad_0, pad_type = var_8548_pad_type_0, strides = var_8548_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_8510_cast_fp16)[name = string("op_8548")]; + tensor var_8553 = const()[name = string("op_8553"), val = tensor([1, 8, 1, 128])]; + tensor var_8554 = reshape(shape = var_8553, x = var_8548)[name = string("op_8554")]; + string var_8570_pad_type_0 = const()[name = string("op_8570_pad_type_0"), val = string("valid")]; + tensor var_8570_strides_0 = const()[name = string("op_8570_strides_0"), val = tensor([1, 1])]; + tensor var_8570_pad_0 = const()[name = string("op_8570_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8570_dilations_0 = const()[name = string("op_8570_dilations_0"), val = tensor([1, 1])]; + int32 var_8570_groups_0 = const()[name = string("op_8570_groups_0"), val = int32(1)]; + tensor var_8570 = conv(dilations = var_8570_dilations_0, groups = var_8570_groups_0, pad = var_8570_pad_0, pad_type = var_8570_pad_type_0, strides = var_8570_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_8510_cast_fp16)[name = string("op_8570")]; + tensor var_8575 = const()[name = string("op_8575"), val = tensor([1, 8, 1, 128])]; + tensor var_8576 = reshape(shape = var_8575, x = var_8570)[name = string("op_8576")]; + int32 var_8591 = const()[name = string("op_8591"), val = int32(-1)]; + fp16 const_424_promoted = const()[name = string("const_424_promoted"), val = fp16(-0x1p+0)]; + tensor var_8593 = mul(x = var_8532, y = const_424_promoted)[name = string("op_8593")]; + bool input_257_interleave_0 = const()[name = string("input_257_interleave_0"), val = bool(false)]; + tensor input_257 = concat(axis = var_8591, interleave = input_257_interleave_0, values = (var_8532, var_8593))[name = string("input_257")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_8588_to_fp16 = const()[name = string("op_8588_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_8588_to_fp16, x = input_257)[name = string("normed_229_cast_fp16")]; + tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_231 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231")]; + tensor const_427 = const()[name = string("const_427"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436432384)))]; + tensor q_29 = mul(x = normed_231, y = const_427)[name = string("q_29")]; + int32 var_8616 = const()[name = string("op_8616"), val = int32(-1)]; + fp16 const_428_promoted = const()[name = string("const_428_promoted"), val = fp16(-0x1p+0)]; + tensor var_8618 = mul(x = var_8554, y = const_428_promoted)[name = string("op_8618")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259 = concat(axis = var_8616, interleave = input_259_interleave_0, values = (var_8554, var_8618))[name = string("input_259")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_8613_to_fp16 = const()[name = string("op_8613_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_8613_to_fp16, x = input_259)[name = string("normed_233_cast_fp16")]; + tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_235 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235")]; + tensor const_431 = const()[name = string("const_431"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436432704)))]; + tensor k_29 = mul(x = normed_235, y = const_431)[name = string("k_29")]; + tensor var_8632 = mul(x = q_29, y = cos_1_cast_fp16)[name = string("op_8632")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29)[name = string("x1_57")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29)[name = string("x2_57")]; + fp16 const_434_promoted = const()[name = string("const_434_promoted"), val = fp16(-0x1p+0)]; + tensor var_8653 = mul(x = x2_57, y = const_434_promoted)[name = string("op_8653")]; + int32 var_8655 = const()[name = string("op_8655"), val = int32(-1)]; + bool var_8656_interleave_0 = const()[name = string("op_8656_interleave_0"), val = bool(false)]; + tensor var_8656 = concat(axis = var_8655, interleave = var_8656_interleave_0, values = (var_8653, x1_57))[name = string("op_8656")]; + tensor var_8657 = mul(x = var_8656, y = sin_1_cast_fp16)[name = string("op_8657")]; + tensor query_states_57 = add(x = var_8632, y = var_8657)[name = string("query_states_57")]; + tensor var_8660 = mul(x = k_29, y = cos_1_cast_fp16)[name = string("op_8660")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29)[name = string("x1_59")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29)[name = string("x2_59")]; + fp16 const_437_promoted = const()[name = string("const_437_promoted"), val = fp16(-0x1p+0)]; + tensor var_8681 = mul(x = x2_59, y = const_437_promoted)[name = string("op_8681")]; + int32 var_8683 = const()[name = string("op_8683"), val = int32(-1)]; + bool var_8684_interleave_0 = const()[name = string("op_8684_interleave_0"), val = bool(false)]; + tensor var_8684 = concat(axis = var_8683, interleave = var_8684_interleave_0, values = (var_8681, x1_59))[name = string("op_8684")]; + tensor var_8685 = mul(x = var_8684, y = sin_1_cast_fp16)[name = string("op_8685")]; + tensor key_states_57 = add(x = var_8660, y = var_8685)[name = string("key_states_57")]; + tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; + tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; + tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; + tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_172, concat_115_values1_0, var_1746, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_57, x = coreml_update_state_83)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; + tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([42])]; + tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; + tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([43])]; + int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; + bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; + tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_118")]; + tensor concat_119_values1_0 = const()[name = string("concat_119_values1_0"), val = tensor([0])]; + tensor concat_119_values3_0 = const()[name = string("concat_119_values3_0"), val = tensor([0])]; + int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; + bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; + tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (expand_dims_178, concat_119_values1_0, var_1746, concat_119_values3_0))[name = string("concat_119")]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = var_8576, x = coreml_update_state_84)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; + tensor var_8740_begin_0 = const()[name = string("op_8740_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_8740_end_0 = const()[name = string("op_8740_end_0"), val = tensor([15, 8, 4096, 128])]; + tensor var_8740_end_mask_0 = const()[name = string("op_8740_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8740_cast_fp16 = slice_by_index(begin = var_8740_begin_0, end = var_8740_end_0, end_mask = var_8740_end_mask_0, x = coreml_update_state_85)[name = string("op_8740_cast_fp16")]; + tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; + tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_8740_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; + tensor var_8747_begin_0 = const()[name = string("op_8747_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_8747_end_0 = const()[name = string("op_8747_end_0"), val = tensor([43, 8, 4096, 128])]; + tensor var_8747_end_mask_0 = const()[name = string("op_8747_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8747_cast_fp16 = slice_by_index(begin = var_8747_begin_0, end = var_8747_end_0, end_mask = var_8747_end_mask_0, x = coreml_update_state_85)[name = string("op_8747_cast_fp16")]; + tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; + tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_8747_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; + tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; + tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_227_cast_fp16")]; + tensor var_8784 = const()[name = string("op_8784"), val = tensor([1, 2, 1, 1])]; + tensor x_229_cast_fp16 = tile(reps = var_8784, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_8796 = const()[name = string("op_8796"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_59_cast_fp16 = reshape(shape = var_8796, x = x_229_cast_fp16)[name = string("key_states_59_cast_fp16")]; + tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; + tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_233_cast_fp16")]; + tensor var_8804 = const()[name = string("op_8804"), val = tensor([1, 2, 1, 1])]; + tensor x_235_cast_fp16 = tile(reps = var_8804, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_8816 = const()[name = string("op_8816"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_87_cast_fp16 = reshape(shape = var_8816, x = x_235_cast_fp16)[name = string("value_states_87_cast_fp16")]; + bool var_8831_transpose_x_1 = const()[name = string("op_8831_transpose_x_1"), val = bool(false)]; + bool var_8831_transpose_y_1 = const()[name = string("op_8831_transpose_y_1"), val = bool(true)]; + tensor var_8831 = matmul(transpose_x = var_8831_transpose_x_1, transpose_y = var_8831_transpose_y_1, x = query_states_57, y = key_states_59_cast_fp16)[name = string("op_8831")]; + fp16 var_8832_to_fp16 = const()[name = string("op_8832_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_85_cast_fp16 = mul(x = var_8831, y = var_8832_to_fp16)[name = string("attn_weights_85_cast_fp16")]; + tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; + int32 var_8867 = const()[name = string("op_8867"), val = int32(-1)]; + tensor attn_weights_89_cast_fp16 = softmax(axis = var_8867, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; + bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; + bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; + tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = value_states_87_cast_fp16)[name = string("attn_output_141_cast_fp16")]; + tensor var_8878_perm_0 = const()[name = string("op_8878_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8882 = const()[name = string("op_8882"), val = tensor([1, 1, 2048])]; + tensor var_8878_cast_fp16 = transpose(perm = var_8878_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_82")]; + tensor attn_output_145_cast_fp16 = reshape(shape = var_8882, x = var_8878_cast_fp16)[name = string("attn_output_145_cast_fp16")]; + tensor var_8887 = const()[name = string("op_8887"), val = tensor([0, 2, 1])]; + string var_8903_pad_type_0 = const()[name = string("op_8903_pad_type_0"), val = string("valid")]; + int32 var_8903_groups_0 = const()[name = string("op_8903_groups_0"), val = int32(1)]; + tensor var_8903_strides_0 = const()[name = string("op_8903_strides_0"), val = tensor([1])]; + tensor var_8903_pad_0 = const()[name = string("op_8903_pad_0"), val = tensor([0, 0])]; + tensor var_8903_dilations_0 = const()[name = string("op_8903_dilations_0"), val = tensor([1])]; + tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436433024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438530240))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8888_cast_fp16 = transpose(perm = var_8887, x = attn_output_145_cast_fp16)[name = string("transpose_81")]; + tensor var_8903_cast_fp16 = conv(dilations = var_8903_dilations_0, groups = var_8903_groups_0, pad = var_8903_pad_0, pad_type = var_8903_pad_type_0, strides = var_8903_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_8888_cast_fp16)[name = string("op_8903_cast_fp16")]; + tensor var_8907 = const()[name = string("op_8907"), val = tensor([0, 2, 1])]; + tensor attn_output_149_cast_fp16 = transpose(perm = var_8907, x = var_8903_cast_fp16)[name = string("transpose_80")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_141_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + int32 var_8920 = const()[name = string("op_8920"), val = int32(-1)]; + fp16 const_446_promoted_to_fp16 = const()[name = string("const_446_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8922_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = const_446_promoted_to_fp16)[name = string("op_8922_cast_fp16")]; + bool input_263_interleave_0 = const()[name = string("input_263_interleave_0"), val = bool(false)]; + tensor input_263_cast_fp16 = concat(axis = var_8920, interleave = input_263_interleave_0, values = (hidden_states_149_cast_fp16, var_8922_cast_fp16))[name = string("input_263_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_8917_to_fp16 = const()[name = string("op_8917_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_8917_to_fp16, x = input_263_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; + tensor const_449_promoted_to_fp16 = const()[name = string("const_449_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438595840)))]; + tensor x_237_cast_fp16 = mul(x = normed_239_cast_fp16, y = const_449_promoted_to_fp16)[name = string("x_237_cast_fp16")]; + tensor var_8947 = const()[name = string("op_8947"), val = tensor([0, 2, 1])]; + tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; + tensor var_8948 = transpose(perm = var_8947, x = x_237_cast_fp16)[name = string("transpose_79")]; + tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_8948)[name = string("input_265")]; + string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; + tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; + tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; + int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; + tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; + string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; + tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; + tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; + int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; + tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; + tensor c_29 = silu(x = input_267)[name = string("c_29")]; + tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; + string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; + tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; + tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; + int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; + tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; + tensor var_8970_axes_0 = const()[name = string("op_8970_axes_0"), val = tensor([2])]; + tensor var_8970 = squeeze(axes = var_8970_axes_0, x = e_29)[name = string("op_8970")]; + tensor var_8971 = const()[name = string("op_8971"), val = tensor([0, 2, 1])]; + tensor var_8972 = transpose(perm = var_8971, x = var_8970)[name = string("transpose_78")]; + tensor hidden_states_151_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = var_8972)[name = string("hidden_states_151_cast_fp16")]; + int32 var_8984 = const()[name = string("op_8984"), val = int32(-1)]; + fp16 const_450_promoted_to_fp16 = const()[name = string("const_450_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8986_cast_fp16 = mul(x = hidden_states_151_cast_fp16, y = const_450_promoted_to_fp16)[name = string("op_8986_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_8984, interleave = input_271_interleave_0, values = (hidden_states_151_cast_fp16, var_8986_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_8981_to_fp16 = const()[name = string("op_8981_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_8981_to_fp16, x = input_271_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; + tensor const_453_promoted_to_fp16 = const()[name = string("const_453_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438597952)))]; + tensor hidden_states_153_cast_fp16 = mul(x = normed_243_cast_fp16, y = const_453_promoted_to_fp16)[name = string("hidden_states_153_cast_fp16")]; + tensor var_9003 = const()[name = string("op_9003"), val = tensor([0, 2, 1])]; + tensor var_9006_axes_0 = const()[name = string("op_9006_axes_0"), val = tensor([2])]; + tensor var_9004_cast_fp16 = transpose(perm = var_9003, x = hidden_states_153_cast_fp16)[name = string("transpose_77")]; + tensor var_9006_cast_fp16 = expand_dims(axes = var_9006_axes_0, x = var_9004_cast_fp16)[name = string("op_9006_cast_fp16")]; + string var_9022_pad_type_0 = const()[name = string("op_9022_pad_type_0"), val = string("valid")]; + tensor var_9022_strides_0 = const()[name = string("op_9022_strides_0"), val = tensor([1, 1])]; + tensor var_9022_pad_0 = const()[name = string("op_9022_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9022_dilations_0 = const()[name = string("op_9022_dilations_0"), val = tensor([1, 1])]; + int32 var_9022_groups_0 = const()[name = string("op_9022_groups_0"), val = int32(1)]; + tensor var_9022 = conv(dilations = var_9022_dilations_0, groups = var_9022_groups_0, pad = var_9022_pad_0, pad_type = var_9022_pad_type_0, strides = var_9022_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_9006_cast_fp16)[name = string("op_9022")]; + tensor var_9027 = const()[name = string("op_9027"), val = tensor([1, 16, 1, 128])]; + tensor var_9028 = reshape(shape = var_9027, x = var_9022)[name = string("op_9028")]; + string var_9044_pad_type_0 = const()[name = string("op_9044_pad_type_0"), val = string("valid")]; + tensor var_9044_strides_0 = const()[name = string("op_9044_strides_0"), val = tensor([1, 1])]; + tensor var_9044_pad_0 = const()[name = string("op_9044_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9044_dilations_0 = const()[name = string("op_9044_dilations_0"), val = tensor([1, 1])]; + int32 var_9044_groups_0 = const()[name = string("op_9044_groups_0"), val = int32(1)]; + tensor var_9044 = conv(dilations = var_9044_dilations_0, groups = var_9044_groups_0, pad = var_9044_pad_0, pad_type = var_9044_pad_type_0, strides = var_9044_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_9006_cast_fp16)[name = string("op_9044")]; + tensor var_9049 = const()[name = string("op_9049"), val = tensor([1, 8, 1, 128])]; + tensor var_9050 = reshape(shape = var_9049, x = var_9044)[name = string("op_9050")]; + string var_9066_pad_type_0 = const()[name = string("op_9066_pad_type_0"), val = string("valid")]; + tensor var_9066_strides_0 = const()[name = string("op_9066_strides_0"), val = tensor([1, 1])]; + tensor var_9066_pad_0 = const()[name = string("op_9066_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9066_dilations_0 = const()[name = string("op_9066_dilations_0"), val = tensor([1, 1])]; + int32 var_9066_groups_0 = const()[name = string("op_9066_groups_0"), val = int32(1)]; + tensor var_9066 = conv(dilations = var_9066_dilations_0, groups = var_9066_groups_0, pad = var_9066_pad_0, pad_type = var_9066_pad_type_0, strides = var_9066_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_9006_cast_fp16)[name = string("op_9066")]; + tensor var_9071 = const()[name = string("op_9071"), val = tensor([1, 8, 1, 128])]; + tensor var_9072 = reshape(shape = var_9071, x = var_9066)[name = string("op_9072")]; + int32 var_9087 = const()[name = string("op_9087"), val = int32(-1)]; + fp16 const_454_promoted = const()[name = string("const_454_promoted"), val = fp16(-0x1p+0)]; + tensor var_9089 = mul(x = var_9028, y = const_454_promoted)[name = string("op_9089")]; + bool input_275_interleave_0 = const()[name = string("input_275_interleave_0"), val = bool(false)]; + tensor input_275 = concat(axis = var_9087, interleave = input_275_interleave_0, values = (var_9028, var_9089))[name = string("input_275")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_9084_to_fp16 = const()[name = string("op_9084_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_9084_to_fp16, x = input_275)[name = string("normed_245_cast_fp16")]; + tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; + tensor const_457 = const()[name = string("const_457"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438600064)))]; + tensor q_31 = mul(x = normed_247, y = const_457)[name = string("q_31")]; + int32 var_9112 = const()[name = string("op_9112"), val = int32(-1)]; + fp16 const_458_promoted = const()[name = string("const_458_promoted"), val = fp16(-0x1p+0)]; + tensor var_9114 = mul(x = var_9050, y = const_458_promoted)[name = string("op_9114")]; + bool input_277_interleave_0 = const()[name = string("input_277_interleave_0"), val = bool(false)]; + tensor input_277 = concat(axis = var_9112, interleave = input_277_interleave_0, values = (var_9050, var_9114))[name = string("input_277")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_9109_to_fp16 = const()[name = string("op_9109_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_9109_to_fp16, x = input_277)[name = string("normed_249_cast_fp16")]; + tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; + tensor const_461 = const()[name = string("const_461"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438600384)))]; + tensor k_31 = mul(x = normed_251, y = const_461)[name = string("k_31")]; + tensor var_9128 = mul(x = q_31, y = cos_1_cast_fp16)[name = string("op_9128")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31)[name = string("x1_61")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31)[name = string("x2_61")]; + fp16 const_464_promoted = const()[name = string("const_464_promoted"), val = fp16(-0x1p+0)]; + tensor var_9149 = mul(x = x2_61, y = const_464_promoted)[name = string("op_9149")]; + int32 var_9151 = const()[name = string("op_9151"), val = int32(-1)]; + bool var_9152_interleave_0 = const()[name = string("op_9152_interleave_0"), val = bool(false)]; + tensor var_9152 = concat(axis = var_9151, interleave = var_9152_interleave_0, values = (var_9149, x1_61))[name = string("op_9152")]; + tensor var_9153 = mul(x = var_9152, y = sin_1_cast_fp16)[name = string("op_9153")]; + tensor query_states_61 = add(x = var_9128, y = var_9153)[name = string("query_states_61")]; + tensor var_9156 = mul(x = k_31, y = cos_1_cast_fp16)[name = string("op_9156")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31)[name = string("x1_63")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31)[name = string("x2_63")]; + fp16 const_467_promoted = const()[name = string("const_467_promoted"), val = fp16(-0x1p+0)]; + tensor var_9177 = mul(x = x2_63, y = const_467_promoted)[name = string("op_9177")]; + int32 var_9179 = const()[name = string("op_9179"), val = int32(-1)]; + bool var_9180_interleave_0 = const()[name = string("op_9180_interleave_0"), val = bool(false)]; + tensor var_9180 = concat(axis = var_9179, interleave = var_9180_interleave_0, values = (var_9177, x1_63))[name = string("op_9180")]; + tensor var_9181 = mul(x = var_9180, y = sin_1_cast_fp16)[name = string("op_9181")]; + tensor key_states_61 = add(x = var_9156, y = var_9181)[name = string("key_states_61")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; + int32 concat_122_axis_0 = const()[name = string("concat_122_axis_0"), val = int32(0)]; + bool concat_122_interleave_0 = const()[name = string("concat_122_interleave_0"), val = bool(false)]; + tensor concat_122 = concat(axis = concat_122_axis_0, interleave = concat_122_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_122")]; + tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; + tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; + int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; + bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; + tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_184, concat_123_values1_0, var_1746, concat_123_values3_0))[name = string("concat_123")]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_122, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_123, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_61, x = coreml_update_state_85)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; + tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([43])]; + tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; + tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; + tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([44])]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_126")]; + tensor concat_127_values1_0 = const()[name = string("concat_127_values1_0"), val = tensor([0])]; + tensor concat_127_values3_0 = const()[name = string("concat_127_values3_0"), val = tensor([0])]; + int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; + bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; + tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (expand_dims_190, concat_127_values1_0, var_1746, concat_127_values3_0))[name = string("concat_127")]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = var_9072, x = coreml_update_state_86)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; + tensor var_9236_begin_0 = const()[name = string("op_9236_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_9236_end_0 = const()[name = string("op_9236_end_0"), val = tensor([16, 8, 4096, 128])]; + tensor var_9236_end_mask_0 = const()[name = string("op_9236_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9236_cast_fp16 = slice_by_index(begin = var_9236_begin_0, end = var_9236_end_0, end_mask = var_9236_end_mask_0, x = coreml_update_state_87)[name = string("op_9236_cast_fp16")]; + tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; + tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_9236_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; + tensor var_9243_begin_0 = const()[name = string("op_9243_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_9243_end_0 = const()[name = string("op_9243_end_0"), val = tensor([44, 8, 4096, 128])]; + tensor var_9243_end_mask_0 = const()[name = string("op_9243_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9243_cast_fp16 = slice_by_index(begin = var_9243_begin_0, end = var_9243_end_0, end_mask = var_9243_end_mask_0, x = coreml_update_state_87)[name = string("op_9243_cast_fp16")]; + tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; + tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_9243_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; + tensor x_243_axes_0 = const()[name = string("x_243_axes_0"), val = tensor([1])]; + tensor x_243_cast_fp16 = expand_dims(axes = x_243_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_9280 = const()[name = string("op_9280"), val = tensor([1, 2, 1, 1])]; + tensor x_245_cast_fp16 = tile(reps = var_9280, x = x_243_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_9292 = const()[name = string("op_9292"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_63_cast_fp16 = reshape(shape = var_9292, x = x_245_cast_fp16)[name = string("key_states_63_cast_fp16")]; + tensor x_249_axes_0 = const()[name = string("x_249_axes_0"), val = tensor([1])]; + tensor x_249_cast_fp16 = expand_dims(axes = x_249_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_249_cast_fp16")]; + tensor var_9300 = const()[name = string("op_9300"), val = tensor([1, 2, 1, 1])]; + tensor x_251_cast_fp16 = tile(reps = var_9300, x = x_249_cast_fp16)[name = string("x_251_cast_fp16")]; + tensor var_9312 = const()[name = string("op_9312"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_93_cast_fp16 = reshape(shape = var_9312, x = x_251_cast_fp16)[name = string("value_states_93_cast_fp16")]; + bool var_9327_transpose_x_1 = const()[name = string("op_9327_transpose_x_1"), val = bool(false)]; + bool var_9327_transpose_y_1 = const()[name = string("op_9327_transpose_y_1"), val = bool(true)]; + tensor var_9327 = matmul(transpose_x = var_9327_transpose_x_1, transpose_y = var_9327_transpose_y_1, x = query_states_61, y = key_states_63_cast_fp16)[name = string("op_9327")]; + fp16 var_9328_to_fp16 = const()[name = string("op_9328_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_91_cast_fp16 = mul(x = var_9327, y = var_9328_to_fp16)[name = string("attn_weights_91_cast_fp16")]; + tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = causal_mask)[name = string("attn_weights_93_cast_fp16")]; + int32 var_9363 = const()[name = string("op_9363"), val = int32(-1)]; + tensor attn_weights_95_cast_fp16 = softmax(axis = var_9363, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; + bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; + bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; + tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = value_states_93_cast_fp16)[name = string("attn_output_151_cast_fp16")]; + tensor var_9374_perm_0 = const()[name = string("op_9374_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9378 = const()[name = string("op_9378"), val = tensor([1, 1, 2048])]; + tensor var_9374_cast_fp16 = transpose(perm = var_9374_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_76")]; + tensor attn_output_155_cast_fp16 = reshape(shape = var_9378, x = var_9374_cast_fp16)[name = string("attn_output_155_cast_fp16")]; + tensor var_9383 = const()[name = string("op_9383"), val = tensor([0, 2, 1])]; + string var_9399_pad_type_0 = const()[name = string("op_9399_pad_type_0"), val = string("valid")]; + int32 var_9399_groups_0 = const()[name = string("op_9399_groups_0"), val = int32(1)]; + tensor var_9399_strides_0 = const()[name = string("op_9399_strides_0"), val = tensor([1])]; + tensor var_9399_pad_0 = const()[name = string("op_9399_pad_0"), val = tensor([0, 0])]; + tensor var_9399_dilations_0 = const()[name = string("op_9399_dilations_0"), val = tensor([1])]; + tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438600704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440697920))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9384_cast_fp16 = transpose(perm = var_9383, x = attn_output_155_cast_fp16)[name = string("transpose_75")]; + tensor var_9399_cast_fp16 = conv(dilations = var_9399_dilations_0, groups = var_9399_groups_0, pad = var_9399_pad_0, pad_type = var_9399_pad_type_0, strides = var_9399_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_9384_cast_fp16)[name = string("op_9399_cast_fp16")]; + tensor var_9403 = const()[name = string("op_9403"), val = tensor([0, 2, 1])]; + tensor attn_output_159_cast_fp16 = transpose(perm = var_9403, x = var_9399_cast_fp16)[name = string("transpose_74")]; + tensor hidden_states_159_cast_fp16 = add(x = hidden_states_151_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + int32 var_9416 = const()[name = string("op_9416"), val = int32(-1)]; + fp16 const_476_promoted_to_fp16 = const()[name = string("const_476_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9418_cast_fp16 = mul(x = hidden_states_159_cast_fp16, y = const_476_promoted_to_fp16)[name = string("op_9418_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_9416, interleave = input_281_interleave_0, values = (hidden_states_159_cast_fp16, var_9418_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_9413_to_fp16 = const()[name = string("op_9413_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_9413_to_fp16, x = input_281_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; + tensor const_479_promoted_to_fp16 = const()[name = string("const_479_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440763520)))]; + tensor x_253_cast_fp16 = mul(x = normed_255_cast_fp16, y = const_479_promoted_to_fp16)[name = string("x_253_cast_fp16")]; + tensor var_9443 = const()[name = string("op_9443"), val = tensor([0, 2, 1])]; + tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; + tensor var_9444 = transpose(perm = var_9443, x = x_253_cast_fp16)[name = string("transpose_73")]; + tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_9444)[name = string("input_283")]; + string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; + tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; + tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; + int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; + tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; + string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; + tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; + tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; + int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; + tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; + tensor c_31 = silu(x = input_285)[name = string("c_31")]; + tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; + string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; + tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; + tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; + int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; + tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; + tensor var_9466_axes_0 = const()[name = string("op_9466_axes_0"), val = tensor([2])]; + tensor var_9466 = squeeze(axes = var_9466_axes_0, x = e_31)[name = string("op_9466")]; + tensor var_9467 = const()[name = string("op_9467"), val = tensor([0, 2, 1])]; + tensor var_9468 = transpose(perm = var_9467, x = var_9466)[name = string("transpose_72")]; + tensor hidden_states_161_cast_fp16 = add(x = hidden_states_159_cast_fp16, y = var_9468)[name = string("hidden_states_161_cast_fp16")]; + int32 var_9480 = const()[name = string("op_9480"), val = int32(-1)]; + fp16 const_480_promoted_to_fp16 = const()[name = string("const_480_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9482_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_480_promoted_to_fp16)[name = string("op_9482_cast_fp16")]; + bool input_289_interleave_0 = const()[name = string("input_289_interleave_0"), val = bool(false)]; + tensor input_289_cast_fp16 = concat(axis = var_9480, interleave = input_289_interleave_0, values = (hidden_states_161_cast_fp16, var_9482_cast_fp16))[name = string("input_289_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_9477_to_fp16 = const()[name = string("op_9477_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_9477_to_fp16, x = input_289_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; + tensor const_483_promoted_to_fp16 = const()[name = string("const_483_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440765632)))]; + tensor hidden_states_163_cast_fp16 = mul(x = normed_259_cast_fp16, y = const_483_promoted_to_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor var_9499 = const()[name = string("op_9499"), val = tensor([0, 2, 1])]; + tensor var_9502_axes_0 = const()[name = string("op_9502_axes_0"), val = tensor([2])]; + tensor var_9500_cast_fp16 = transpose(perm = var_9499, x = hidden_states_163_cast_fp16)[name = string("transpose_71")]; + tensor var_9502_cast_fp16 = expand_dims(axes = var_9502_axes_0, x = var_9500_cast_fp16)[name = string("op_9502_cast_fp16")]; + string var_9518_pad_type_0 = const()[name = string("op_9518_pad_type_0"), val = string("valid")]; + tensor var_9518_strides_0 = const()[name = string("op_9518_strides_0"), val = tensor([1, 1])]; + tensor var_9518_pad_0 = const()[name = string("op_9518_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9518_dilations_0 = const()[name = string("op_9518_dilations_0"), val = tensor([1, 1])]; + int32 var_9518_groups_0 = const()[name = string("op_9518_groups_0"), val = int32(1)]; + tensor var_9518 = conv(dilations = var_9518_dilations_0, groups = var_9518_groups_0, pad = var_9518_pad_0, pad_type = var_9518_pad_type_0, strides = var_9518_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_9502_cast_fp16)[name = string("op_9518")]; + tensor var_9523 = const()[name = string("op_9523"), val = tensor([1, 16, 1, 128])]; + tensor var_9524 = reshape(shape = var_9523, x = var_9518)[name = string("op_9524")]; + string var_9540_pad_type_0 = const()[name = string("op_9540_pad_type_0"), val = string("valid")]; + tensor var_9540_strides_0 = const()[name = string("op_9540_strides_0"), val = tensor([1, 1])]; + tensor var_9540_pad_0 = const()[name = string("op_9540_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9540_dilations_0 = const()[name = string("op_9540_dilations_0"), val = tensor([1, 1])]; + int32 var_9540_groups_0 = const()[name = string("op_9540_groups_0"), val = int32(1)]; + tensor var_9540 = conv(dilations = var_9540_dilations_0, groups = var_9540_groups_0, pad = var_9540_pad_0, pad_type = var_9540_pad_type_0, strides = var_9540_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_9502_cast_fp16)[name = string("op_9540")]; + tensor var_9545 = const()[name = string("op_9545"), val = tensor([1, 8, 1, 128])]; + tensor var_9546 = reshape(shape = var_9545, x = var_9540)[name = string("op_9546")]; + string var_9562_pad_type_0 = const()[name = string("op_9562_pad_type_0"), val = string("valid")]; + tensor var_9562_strides_0 = const()[name = string("op_9562_strides_0"), val = tensor([1, 1])]; + tensor var_9562_pad_0 = const()[name = string("op_9562_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9562_dilations_0 = const()[name = string("op_9562_dilations_0"), val = tensor([1, 1])]; + int32 var_9562_groups_0 = const()[name = string("op_9562_groups_0"), val = int32(1)]; + tensor var_9562 = conv(dilations = var_9562_dilations_0, groups = var_9562_groups_0, pad = var_9562_pad_0, pad_type = var_9562_pad_type_0, strides = var_9562_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_9502_cast_fp16)[name = string("op_9562")]; + tensor var_9567 = const()[name = string("op_9567"), val = tensor([1, 8, 1, 128])]; + tensor var_9568 = reshape(shape = var_9567, x = var_9562)[name = string("op_9568")]; + int32 var_9583 = const()[name = string("op_9583"), val = int32(-1)]; + fp16 const_484_promoted = const()[name = string("const_484_promoted"), val = fp16(-0x1p+0)]; + tensor var_9585 = mul(x = var_9524, y = const_484_promoted)[name = string("op_9585")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293 = concat(axis = var_9583, interleave = input_293_interleave_0, values = (var_9524, var_9585))[name = string("input_293")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_9580_to_fp16 = const()[name = string("op_9580_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_9580_to_fp16, x = input_293)[name = string("normed_261_cast_fp16")]; + tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_263 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263")]; + tensor const_487 = const()[name = string("const_487"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440767744)))]; + tensor q_33 = mul(x = normed_263, y = const_487)[name = string("q_33")]; + int32 var_9608 = const()[name = string("op_9608"), val = int32(-1)]; + fp16 const_488_promoted = const()[name = string("const_488_promoted"), val = fp16(-0x1p+0)]; + tensor var_9610 = mul(x = var_9546, y = const_488_promoted)[name = string("op_9610")]; + bool input_295_interleave_0 = const()[name = string("input_295_interleave_0"), val = bool(false)]; + tensor input_295 = concat(axis = var_9608, interleave = input_295_interleave_0, values = (var_9546, var_9610))[name = string("input_295")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_9605_to_fp16 = const()[name = string("op_9605_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_9605_to_fp16, x = input_295)[name = string("normed_265_cast_fp16")]; + tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_267 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267")]; + tensor const_491 = const()[name = string("const_491"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440768064)))]; + tensor k_33 = mul(x = normed_267, y = const_491)[name = string("k_33")]; + tensor var_9624 = mul(x = q_33, y = cos_1_cast_fp16)[name = string("op_9624")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33)[name = string("x1_65")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33)[name = string("x2_65")]; + fp16 const_494_promoted = const()[name = string("const_494_promoted"), val = fp16(-0x1p+0)]; + tensor var_9645 = mul(x = x2_65, y = const_494_promoted)[name = string("op_9645")]; + int32 var_9647 = const()[name = string("op_9647"), val = int32(-1)]; + bool var_9648_interleave_0 = const()[name = string("op_9648_interleave_0"), val = bool(false)]; + tensor var_9648 = concat(axis = var_9647, interleave = var_9648_interleave_0, values = (var_9645, x1_65))[name = string("op_9648")]; + tensor var_9649 = mul(x = var_9648, y = sin_1_cast_fp16)[name = string("op_9649")]; + tensor query_states_65 = add(x = var_9624, y = var_9649)[name = string("query_states_65")]; + tensor var_9652 = mul(x = k_33, y = cos_1_cast_fp16)[name = string("op_9652")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33)[name = string("x1_67")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33)[name = string("x2_67")]; + fp16 const_497_promoted = const()[name = string("const_497_promoted"), val = fp16(-0x1p+0)]; + tensor var_9673 = mul(x = x2_67, y = const_497_promoted)[name = string("op_9673")]; + int32 var_9675 = const()[name = string("op_9675"), val = int32(-1)]; + bool var_9676_interleave_0 = const()[name = string("op_9676_interleave_0"), val = bool(false)]; + tensor var_9676 = concat(axis = var_9675, interleave = var_9676_interleave_0, values = (var_9673, x1_67))[name = string("op_9676")]; + tensor var_9677 = mul(x = var_9676, y = sin_1_cast_fp16)[name = string("op_9677")]; + tensor key_states_65 = add(x = var_9652, y = var_9677)[name = string("key_states_65")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([16])]; + tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; + tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; + tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([17])]; + int32 concat_130_axis_0 = const()[name = string("concat_130_axis_0"), val = int32(0)]; + bool concat_130_interleave_0 = const()[name = string("concat_130_interleave_0"), val = bool(false)]; + tensor concat_130 = concat(axis = concat_130_axis_0, interleave = concat_130_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_130")]; + tensor concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = tensor([0])]; + tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; + int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; + bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; + tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_196, concat_131_values1_0, var_1746, concat_131_values3_0))[name = string("concat_131")]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_130, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_131, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_65, x = coreml_update_state_87)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_32")]; + tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([44])]; + tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; + tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; + tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([45])]; + int32 concat_134_axis_0 = const()[name = string("concat_134_axis_0"), val = int32(0)]; + bool concat_134_interleave_0 = const()[name = string("concat_134_interleave_0"), val = bool(false)]; + tensor concat_134 = concat(axis = concat_134_axis_0, interleave = concat_134_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_134")]; + tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; + tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; + int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; + bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; + tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (expand_dims_202, concat_135_values1_0, var_1746, concat_135_values3_0))[name = string("concat_135")]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_134, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_135, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = var_9568, x = coreml_update_state_88)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_33")]; + tensor var_9732_begin_0 = const()[name = string("op_9732_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_9732_end_0 = const()[name = string("op_9732_end_0"), val = tensor([17, 8, 4096, 128])]; + tensor var_9732_end_mask_0 = const()[name = string("op_9732_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9732_cast_fp16 = slice_by_index(begin = var_9732_begin_0, end = var_9732_end_0, end_mask = var_9732_end_mask_0, x = coreml_update_state_89)[name = string("op_9732_cast_fp16")]; + tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; + tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_9732_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; + tensor var_9739_begin_0 = const()[name = string("op_9739_begin_0"), val = tensor([44, 0, 0, 0])]; + tensor var_9739_end_0 = const()[name = string("op_9739_end_0"), val = tensor([45, 8, 4096, 128])]; + tensor var_9739_end_mask_0 = const()[name = string("op_9739_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9739_cast_fp16 = slice_by_index(begin = var_9739_begin_0, end = var_9739_end_0, end_mask = var_9739_end_mask_0, x = coreml_update_state_89)[name = string("op_9739_cast_fp16")]; + tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; + tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_9739_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; + tensor x_259_axes_0 = const()[name = string("x_259_axes_0"), val = tensor([1])]; + tensor x_259_cast_fp16 = expand_dims(axes = x_259_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_259_cast_fp16")]; + tensor var_9776 = const()[name = string("op_9776"), val = tensor([1, 2, 1, 1])]; + tensor x_261_cast_fp16 = tile(reps = var_9776, x = x_259_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_9788 = const()[name = string("op_9788"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_67_cast_fp16 = reshape(shape = var_9788, x = x_261_cast_fp16)[name = string("key_states_67_cast_fp16")]; + tensor x_265_axes_0 = const()[name = string("x_265_axes_0"), val = tensor([1])]; + tensor x_265_cast_fp16 = expand_dims(axes = x_265_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_9796 = const()[name = string("op_9796"), val = tensor([1, 2, 1, 1])]; + tensor x_267_cast_fp16 = tile(reps = var_9796, x = x_265_cast_fp16)[name = string("x_267_cast_fp16")]; + tensor var_9808 = const()[name = string("op_9808"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_99_cast_fp16 = reshape(shape = var_9808, x = x_267_cast_fp16)[name = string("value_states_99_cast_fp16")]; + bool var_9823_transpose_x_1 = const()[name = string("op_9823_transpose_x_1"), val = bool(false)]; + bool var_9823_transpose_y_1 = const()[name = string("op_9823_transpose_y_1"), val = bool(true)]; + tensor var_9823 = matmul(transpose_x = var_9823_transpose_x_1, transpose_y = var_9823_transpose_y_1, x = query_states_65, y = key_states_67_cast_fp16)[name = string("op_9823")]; + fp16 var_9824_to_fp16 = const()[name = string("op_9824_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_97_cast_fp16 = mul(x = var_9823, y = var_9824_to_fp16)[name = string("attn_weights_97_cast_fp16")]; + tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; + int32 var_9859 = const()[name = string("op_9859"), val = int32(-1)]; + tensor attn_weights_101_cast_fp16 = softmax(axis = var_9859, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; + bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; + bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; + tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = value_states_99_cast_fp16)[name = string("attn_output_161_cast_fp16")]; + tensor var_9870_perm_0 = const()[name = string("op_9870_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9874 = const()[name = string("op_9874"), val = tensor([1, 1, 2048])]; + tensor var_9870_cast_fp16 = transpose(perm = var_9870_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_70")]; + tensor attn_output_165_cast_fp16 = reshape(shape = var_9874, x = var_9870_cast_fp16)[name = string("attn_output_165_cast_fp16")]; + tensor var_9879 = const()[name = string("op_9879"), val = tensor([0, 2, 1])]; + string var_9895_pad_type_0 = const()[name = string("op_9895_pad_type_0"), val = string("valid")]; + int32 var_9895_groups_0 = const()[name = string("op_9895_groups_0"), val = int32(1)]; + tensor var_9895_strides_0 = const()[name = string("op_9895_strides_0"), val = tensor([1])]; + tensor var_9895_pad_0 = const()[name = string("op_9895_pad_0"), val = tensor([0, 0])]; + tensor var_9895_dilations_0 = const()[name = string("op_9895_dilations_0"), val = tensor([1])]; + tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440768384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442865600))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9880_cast_fp16 = transpose(perm = var_9879, x = attn_output_165_cast_fp16)[name = string("transpose_69")]; + tensor var_9895_cast_fp16 = conv(dilations = var_9895_dilations_0, groups = var_9895_groups_0, pad = var_9895_pad_0, pad_type = var_9895_pad_type_0, strides = var_9895_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_9880_cast_fp16)[name = string("op_9895_cast_fp16")]; + tensor var_9899 = const()[name = string("op_9899"), val = tensor([0, 2, 1])]; + tensor attn_output_169_cast_fp16 = transpose(perm = var_9899, x = var_9895_cast_fp16)[name = string("transpose_68")]; + tensor hidden_states_169_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; + int32 var_9912 = const()[name = string("op_9912"), val = int32(-1)]; + fp16 const_506_promoted_to_fp16 = const()[name = string("const_506_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9914_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = const_506_promoted_to_fp16)[name = string("op_9914_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_9912, interleave = input_299_interleave_0, values = (hidden_states_169_cast_fp16, var_9914_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_9909_to_fp16 = const()[name = string("op_9909_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_9909_to_fp16, x = input_299_cast_fp16)[name = string("normed_269_cast_fp16")]; + tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_271_cast_fp16 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271_cast_fp16")]; + tensor const_509_promoted_to_fp16 = const()[name = string("const_509_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442931200)))]; + tensor x_269_cast_fp16 = mul(x = normed_271_cast_fp16, y = const_509_promoted_to_fp16)[name = string("x_269_cast_fp16")]; + tensor var_9939 = const()[name = string("op_9939"), val = tensor([0, 2, 1])]; + tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; + tensor var_9940 = transpose(perm = var_9939, x = x_269_cast_fp16)[name = string("transpose_67")]; + tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_9940)[name = string("input_301")]; + string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; + tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; + tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; + int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; + tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; + string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; + tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; + tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; + int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; + tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; + tensor c_33 = silu(x = input_303)[name = string("c_33")]; + tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; + string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; + tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; + tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; + int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; + tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; + tensor var_9962_axes_0 = const()[name = string("op_9962_axes_0"), val = tensor([2])]; + tensor var_9962 = squeeze(axes = var_9962_axes_0, x = e_33)[name = string("op_9962")]; + tensor var_9963 = const()[name = string("op_9963"), val = tensor([0, 2, 1])]; + tensor var_9964 = transpose(perm = var_9963, x = var_9962)[name = string("transpose_66")]; + tensor hidden_states_171_cast_fp16 = add(x = hidden_states_169_cast_fp16, y = var_9964)[name = string("hidden_states_171_cast_fp16")]; + int32 var_9976 = const()[name = string("op_9976"), val = int32(-1)]; + fp16 const_510_promoted_to_fp16 = const()[name = string("const_510_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9978_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = const_510_promoted_to_fp16)[name = string("op_9978_cast_fp16")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307_cast_fp16 = concat(axis = var_9976, interleave = input_307_interleave_0, values = (hidden_states_171_cast_fp16, var_9978_cast_fp16))[name = string("input_307_cast_fp16")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_9973_to_fp16 = const()[name = string("op_9973_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_9973_to_fp16, x = input_307_cast_fp16)[name = string("normed_273_cast_fp16")]; + tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_275_cast_fp16 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275_cast_fp16")]; + tensor const_513_promoted_to_fp16 = const()[name = string("const_513_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442933312)))]; + tensor hidden_states_173_cast_fp16 = mul(x = normed_275_cast_fp16, y = const_513_promoted_to_fp16)[name = string("hidden_states_173_cast_fp16")]; + tensor var_9995 = const()[name = string("op_9995"), val = tensor([0, 2, 1])]; + tensor var_9998_axes_0 = const()[name = string("op_9998_axes_0"), val = tensor([2])]; + tensor var_9996_cast_fp16 = transpose(perm = var_9995, x = hidden_states_173_cast_fp16)[name = string("transpose_65")]; + tensor var_9998_cast_fp16 = expand_dims(axes = var_9998_axes_0, x = var_9996_cast_fp16)[name = string("op_9998_cast_fp16")]; + string var_10014_pad_type_0 = const()[name = string("op_10014_pad_type_0"), val = string("valid")]; + tensor var_10014_strides_0 = const()[name = string("op_10014_strides_0"), val = tensor([1, 1])]; + tensor var_10014_pad_0 = const()[name = string("op_10014_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10014_dilations_0 = const()[name = string("op_10014_dilations_0"), val = tensor([1, 1])]; + int32 var_10014_groups_0 = const()[name = string("op_10014_groups_0"), val = int32(1)]; + tensor var_10014 = conv(dilations = var_10014_dilations_0, groups = var_10014_groups_0, pad = var_10014_pad_0, pad_type = var_10014_pad_type_0, strides = var_10014_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_9998_cast_fp16)[name = string("op_10014")]; + tensor var_10019 = const()[name = string("op_10019"), val = tensor([1, 16, 1, 128])]; + tensor var_10020 = reshape(shape = var_10019, x = var_10014)[name = string("op_10020")]; + string var_10036_pad_type_0 = const()[name = string("op_10036_pad_type_0"), val = string("valid")]; + tensor var_10036_strides_0 = const()[name = string("op_10036_strides_0"), val = tensor([1, 1])]; + tensor var_10036_pad_0 = const()[name = string("op_10036_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10036_dilations_0 = const()[name = string("op_10036_dilations_0"), val = tensor([1, 1])]; + int32 var_10036_groups_0 = const()[name = string("op_10036_groups_0"), val = int32(1)]; + tensor var_10036 = conv(dilations = var_10036_dilations_0, groups = var_10036_groups_0, pad = var_10036_pad_0, pad_type = var_10036_pad_type_0, strides = var_10036_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_9998_cast_fp16)[name = string("op_10036")]; + tensor var_10041 = const()[name = string("op_10041"), val = tensor([1, 8, 1, 128])]; + tensor var_10042 = reshape(shape = var_10041, x = var_10036)[name = string("op_10042")]; + string var_10058_pad_type_0 = const()[name = string("op_10058_pad_type_0"), val = string("valid")]; + tensor var_10058_strides_0 = const()[name = string("op_10058_strides_0"), val = tensor([1, 1])]; + tensor var_10058_pad_0 = const()[name = string("op_10058_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10058_dilations_0 = const()[name = string("op_10058_dilations_0"), val = tensor([1, 1])]; + int32 var_10058_groups_0 = const()[name = string("op_10058_groups_0"), val = int32(1)]; + tensor var_10058 = conv(dilations = var_10058_dilations_0, groups = var_10058_groups_0, pad = var_10058_pad_0, pad_type = var_10058_pad_type_0, strides = var_10058_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_9998_cast_fp16)[name = string("op_10058")]; + tensor var_10063 = const()[name = string("op_10063"), val = tensor([1, 8, 1, 128])]; + tensor var_10064 = reshape(shape = var_10063, x = var_10058)[name = string("op_10064")]; + int32 var_10079 = const()[name = string("op_10079"), val = int32(-1)]; + fp16 const_514_promoted = const()[name = string("const_514_promoted"), val = fp16(-0x1p+0)]; + tensor var_10081 = mul(x = var_10020, y = const_514_promoted)[name = string("op_10081")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311 = concat(axis = var_10079, interleave = input_311_interleave_0, values = (var_10020, var_10081))[name = string("input_311")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_10076_to_fp16 = const()[name = string("op_10076_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_10076_to_fp16, x = input_311)[name = string("normed_277_cast_fp16")]; + tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_279 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279")]; + tensor const_517 = const()[name = string("const_517"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442935424)))]; + tensor q_35 = mul(x = normed_279, y = const_517)[name = string("q_35")]; + int32 var_10104 = const()[name = string("op_10104"), val = int32(-1)]; + fp16 const_518_promoted = const()[name = string("const_518_promoted"), val = fp16(-0x1p+0)]; + tensor var_10106 = mul(x = var_10042, y = const_518_promoted)[name = string("op_10106")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313 = concat(axis = var_10104, interleave = input_313_interleave_0, values = (var_10042, var_10106))[name = string("input_313")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_10101_to_fp16 = const()[name = string("op_10101_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_10101_to_fp16, x = input_313)[name = string("normed_281_cast_fp16")]; + tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_283 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283")]; + tensor const_521 = const()[name = string("const_521"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442935744)))]; + tensor k_35 = mul(x = normed_283, y = const_521)[name = string("k_35")]; + tensor var_10120 = mul(x = q_35, y = cos_1_cast_fp16)[name = string("op_10120")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35)[name = string("x1_69")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35)[name = string("x2_69")]; + fp16 const_524_promoted = const()[name = string("const_524_promoted"), val = fp16(-0x1p+0)]; + tensor var_10141 = mul(x = x2_69, y = const_524_promoted)[name = string("op_10141")]; + int32 var_10143 = const()[name = string("op_10143"), val = int32(-1)]; + bool var_10144_interleave_0 = const()[name = string("op_10144_interleave_0"), val = bool(false)]; + tensor var_10144 = concat(axis = var_10143, interleave = var_10144_interleave_0, values = (var_10141, x1_69))[name = string("op_10144")]; + tensor var_10145 = mul(x = var_10144, y = sin_1_cast_fp16)[name = string("op_10145")]; + tensor query_states_69 = add(x = var_10120, y = var_10145)[name = string("query_states_69")]; + tensor var_10148 = mul(x = k_35, y = cos_1_cast_fp16)[name = string("op_10148")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35)[name = string("x1_71")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35)[name = string("x2_71")]; + fp16 const_527_promoted = const()[name = string("const_527_promoted"), val = fp16(-0x1p+0)]; + tensor var_10169 = mul(x = x2_71, y = const_527_promoted)[name = string("op_10169")]; + int32 var_10171 = const()[name = string("op_10171"), val = int32(-1)]; + bool var_10172_interleave_0 = const()[name = string("op_10172_interleave_0"), val = bool(false)]; + tensor var_10172 = concat(axis = var_10171, interleave = var_10172_interleave_0, values = (var_10169, x1_71))[name = string("op_10172")]; + tensor var_10173 = mul(x = var_10172, y = sin_1_cast_fp16)[name = string("op_10173")]; + tensor key_states_69 = add(x = var_10148, y = var_10173)[name = string("key_states_69")]; + tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([17])]; + tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; + tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; + tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([18])]; + int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; + bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; + tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_138")]; + tensor concat_139_values1_0 = const()[name = string("concat_139_values1_0"), val = tensor([0])]; + tensor concat_139_values3_0 = const()[name = string("concat_139_values3_0"), val = tensor([0])]; + int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; + bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; + tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (expand_dims_208, concat_139_values1_0, var_1746, concat_139_values3_0))[name = string("concat_139")]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_138, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_139, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_69, x = coreml_update_state_89)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_34")]; + tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([45])]; + tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; + tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; + tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([46])]; + int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; + bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; + tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_142")]; + tensor concat_143_values1_0 = const()[name = string("concat_143_values1_0"), val = tensor([0])]; + tensor concat_143_values3_0 = const()[name = string("concat_143_values3_0"), val = tensor([0])]; + int32 concat_143_axis_0 = const()[name = string("concat_143_axis_0"), val = int32(0)]; + bool concat_143_interleave_0 = const()[name = string("concat_143_interleave_0"), val = bool(false)]; + tensor concat_143 = concat(axis = concat_143_axis_0, interleave = concat_143_interleave_0, values = (expand_dims_214, concat_143_values1_0, var_1746, concat_143_values3_0))[name = string("concat_143")]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_142, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_143, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = var_10064, x = coreml_update_state_90)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_35")]; + tensor var_10228_begin_0 = const()[name = string("op_10228_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_10228_end_0 = const()[name = string("op_10228_end_0"), val = tensor([18, 8, 4096, 128])]; + tensor var_10228_end_mask_0 = const()[name = string("op_10228_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10228_cast_fp16 = slice_by_index(begin = var_10228_begin_0, end = var_10228_end_0, end_mask = var_10228_end_mask_0, x = coreml_update_state_91)[name = string("op_10228_cast_fp16")]; + tensor K_layer_cache_35_axes_0 = const()[name = string("K_layer_cache_35_axes_0"), val = tensor([0])]; + tensor K_layer_cache_35_cast_fp16 = squeeze(axes = K_layer_cache_35_axes_0, x = var_10228_cast_fp16)[name = string("K_layer_cache_35_cast_fp16")]; + tensor var_10235_begin_0 = const()[name = string("op_10235_begin_0"), val = tensor([45, 0, 0, 0])]; + tensor var_10235_end_0 = const()[name = string("op_10235_end_0"), val = tensor([46, 8, 4096, 128])]; + tensor var_10235_end_mask_0 = const()[name = string("op_10235_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10235_cast_fp16 = slice_by_index(begin = var_10235_begin_0, end = var_10235_end_0, end_mask = var_10235_end_mask_0, x = coreml_update_state_91)[name = string("op_10235_cast_fp16")]; + tensor V_layer_cache_35_axes_0 = const()[name = string("V_layer_cache_35_axes_0"), val = tensor([0])]; + tensor V_layer_cache_35_cast_fp16 = squeeze(axes = V_layer_cache_35_axes_0, x = var_10235_cast_fp16)[name = string("V_layer_cache_35_cast_fp16")]; + tensor x_275_axes_0 = const()[name = string("x_275_axes_0"), val = tensor([1])]; + tensor x_275_cast_fp16 = expand_dims(axes = x_275_axes_0, x = K_layer_cache_35_cast_fp16)[name = string("x_275_cast_fp16")]; + tensor var_10272 = const()[name = string("op_10272"), val = tensor([1, 2, 1, 1])]; + tensor x_277_cast_fp16 = tile(reps = var_10272, x = x_275_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_10284 = const()[name = string("op_10284"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_71_cast_fp16 = reshape(shape = var_10284, x = x_277_cast_fp16)[name = string("key_states_71_cast_fp16")]; + tensor x_281_axes_0 = const()[name = string("x_281_axes_0"), val = tensor([1])]; + tensor x_281_cast_fp16 = expand_dims(axes = x_281_axes_0, x = V_layer_cache_35_cast_fp16)[name = string("x_281_cast_fp16")]; + tensor var_10292 = const()[name = string("op_10292"), val = tensor([1, 2, 1, 1])]; + tensor x_283_cast_fp16 = tile(reps = var_10292, x = x_281_cast_fp16)[name = string("x_283_cast_fp16")]; + tensor var_10304 = const()[name = string("op_10304"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_105_cast_fp16 = reshape(shape = var_10304, x = x_283_cast_fp16)[name = string("value_states_105_cast_fp16")]; + bool var_10319_transpose_x_1 = const()[name = string("op_10319_transpose_x_1"), val = bool(false)]; + bool var_10319_transpose_y_1 = const()[name = string("op_10319_transpose_y_1"), val = bool(true)]; + tensor var_10319 = matmul(transpose_x = var_10319_transpose_x_1, transpose_y = var_10319_transpose_y_1, x = query_states_69, y = key_states_71_cast_fp16)[name = string("op_10319")]; + fp16 var_10320_to_fp16 = const()[name = string("op_10320_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_103_cast_fp16 = mul(x = var_10319, y = var_10320_to_fp16)[name = string("attn_weights_103_cast_fp16")]; + tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; + int32 var_10355 = const()[name = string("op_10355"), val = int32(-1)]; + tensor attn_weights_107_cast_fp16 = softmax(axis = var_10355, x = attn_weights_105_cast_fp16)[name = string("attn_weights_107_cast_fp16")]; + bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; + bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; + tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_107_cast_fp16, y = value_states_105_cast_fp16)[name = string("attn_output_171_cast_fp16")]; + tensor var_10366_perm_0 = const()[name = string("op_10366_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10370 = const()[name = string("op_10370"), val = tensor([1, 1, 2048])]; + tensor var_10366_cast_fp16 = transpose(perm = var_10366_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_64")]; + tensor attn_output_175_cast_fp16 = reshape(shape = var_10370, x = var_10366_cast_fp16)[name = string("attn_output_175_cast_fp16")]; + tensor var_10375 = const()[name = string("op_10375"), val = tensor([0, 2, 1])]; + string var_10391_pad_type_0 = const()[name = string("op_10391_pad_type_0"), val = string("valid")]; + int32 var_10391_groups_0 = const()[name = string("op_10391_groups_0"), val = int32(1)]; + tensor var_10391_strides_0 = const()[name = string("op_10391_strides_0"), val = tensor([1])]; + tensor var_10391_pad_0 = const()[name = string("op_10391_pad_0"), val = tensor([0, 0])]; + tensor var_10391_dilations_0 = const()[name = string("op_10391_dilations_0"), val = tensor([1])]; + tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442936064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445033280))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10376_cast_fp16 = transpose(perm = var_10375, x = attn_output_175_cast_fp16)[name = string("transpose_63")]; + tensor var_10391_cast_fp16 = conv(dilations = var_10391_dilations_0, groups = var_10391_groups_0, pad = var_10391_pad_0, pad_type = var_10391_pad_type_0, strides = var_10391_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_10376_cast_fp16)[name = string("op_10391_cast_fp16")]; + tensor var_10395 = const()[name = string("op_10395"), val = tensor([0, 2, 1])]; + tensor attn_output_179_cast_fp16 = transpose(perm = var_10395, x = var_10391_cast_fp16)[name = string("transpose_62")]; + tensor hidden_states_179_cast_fp16 = add(x = hidden_states_171_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; + int32 var_10408 = const()[name = string("op_10408"), val = int32(-1)]; + fp16 const_536_promoted_to_fp16 = const()[name = string("const_536_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10410_cast_fp16 = mul(x = hidden_states_179_cast_fp16, y = const_536_promoted_to_fp16)[name = string("op_10410_cast_fp16")]; + bool input_317_interleave_0 = const()[name = string("input_317_interleave_0"), val = bool(false)]; + tensor input_317_cast_fp16 = concat(axis = var_10408, interleave = input_317_interleave_0, values = (hidden_states_179_cast_fp16, var_10410_cast_fp16))[name = string("input_317_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_10405_to_fp16 = const()[name = string("op_10405_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_10405_to_fp16, x = input_317_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; + tensor const_539_promoted_to_fp16 = const()[name = string("const_539_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445098880)))]; + tensor x_285_cast_fp16 = mul(x = normed_287_cast_fp16, y = const_539_promoted_to_fp16)[name = string("x_285_cast_fp16")]; + tensor var_10435 = const()[name = string("op_10435"), val = tensor([0, 2, 1])]; + tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; + tensor var_10436 = transpose(perm = var_10435, x = x_285_cast_fp16)[name = string("transpose_61")]; + tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_10436)[name = string("input_319")]; + string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; + tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; + tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; + int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; + tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; + string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; + tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; + tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; + int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; + tensor b_35 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_319)[name = string("b_35")]; + tensor c_35 = silu(x = input_321)[name = string("c_35")]; + tensor input_323 = mul(x = c_35, y = b_35)[name = string("input_323")]; + string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; + tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; + tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; + int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; + tensor e_35 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input_323)[name = string("e_35")]; + tensor var_10458_axes_0 = const()[name = string("op_10458_axes_0"), val = tensor([2])]; + tensor var_10458 = squeeze(axes = var_10458_axes_0, x = e_35)[name = string("op_10458")]; + tensor var_10459 = const()[name = string("op_10459"), val = tensor([0, 2, 1])]; + tensor var_10460 = transpose(perm = var_10459, x = var_10458)[name = string("transpose_60")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_179_cast_fp16, y = var_10460)[name = string("hidden_states_181_cast_fp16")]; + int32 var_10472 = const()[name = string("op_10472"), val = int32(-1)]; + fp16 const_540_promoted_to_fp16 = const()[name = string("const_540_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10474_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = const_540_promoted_to_fp16)[name = string("op_10474_cast_fp16")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325_cast_fp16 = concat(axis = var_10472, interleave = input_325_interleave_0, values = (hidden_states_181_cast_fp16, var_10474_cast_fp16))[name = string("input_325_cast_fp16")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_10469_to_fp16 = const()[name = string("op_10469_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_10469_to_fp16, x = input_325_cast_fp16)[name = string("normed_289_cast_fp16")]; + tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; + tensor const_543_promoted_to_fp16 = const()[name = string("const_543_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445100992)))]; + tensor hidden_states_183_cast_fp16 = mul(x = normed_291_cast_fp16, y = const_543_promoted_to_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor var_10491 = const()[name = string("op_10491"), val = tensor([0, 2, 1])]; + tensor var_10494_axes_0 = const()[name = string("op_10494_axes_0"), val = tensor([2])]; + tensor var_10492_cast_fp16 = transpose(perm = var_10491, x = hidden_states_183_cast_fp16)[name = string("transpose_59")]; + tensor var_10494_cast_fp16 = expand_dims(axes = var_10494_axes_0, x = var_10492_cast_fp16)[name = string("op_10494_cast_fp16")]; + string var_10510_pad_type_0 = const()[name = string("op_10510_pad_type_0"), val = string("valid")]; + tensor var_10510_strides_0 = const()[name = string("op_10510_strides_0"), val = tensor([1, 1])]; + tensor var_10510_pad_0 = const()[name = string("op_10510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10510_dilations_0 = const()[name = string("op_10510_dilations_0"), val = tensor([1, 1])]; + int32 var_10510_groups_0 = const()[name = string("op_10510_groups_0"), val = int32(1)]; + tensor var_10510 = conv(dilations = var_10510_dilations_0, groups = var_10510_groups_0, pad = var_10510_pad_0, pad_type = var_10510_pad_type_0, strides = var_10510_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_10494_cast_fp16)[name = string("op_10510")]; + tensor var_10515 = const()[name = string("op_10515"), val = tensor([1, 16, 1, 128])]; + tensor var_10516 = reshape(shape = var_10515, x = var_10510)[name = string("op_10516")]; + string var_10532_pad_type_0 = const()[name = string("op_10532_pad_type_0"), val = string("valid")]; + tensor var_10532_strides_0 = const()[name = string("op_10532_strides_0"), val = tensor([1, 1])]; + tensor var_10532_pad_0 = const()[name = string("op_10532_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10532_dilations_0 = const()[name = string("op_10532_dilations_0"), val = tensor([1, 1])]; + int32 var_10532_groups_0 = const()[name = string("op_10532_groups_0"), val = int32(1)]; + tensor var_10532 = conv(dilations = var_10532_dilations_0, groups = var_10532_groups_0, pad = var_10532_pad_0, pad_type = var_10532_pad_type_0, strides = var_10532_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_10494_cast_fp16)[name = string("op_10532")]; + tensor var_10537 = const()[name = string("op_10537"), val = tensor([1, 8, 1, 128])]; + tensor var_10538 = reshape(shape = var_10537, x = var_10532)[name = string("op_10538")]; + string var_10554_pad_type_0 = const()[name = string("op_10554_pad_type_0"), val = string("valid")]; + tensor var_10554_strides_0 = const()[name = string("op_10554_strides_0"), val = tensor([1, 1])]; + tensor var_10554_pad_0 = const()[name = string("op_10554_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10554_dilations_0 = const()[name = string("op_10554_dilations_0"), val = tensor([1, 1])]; + int32 var_10554_groups_0 = const()[name = string("op_10554_groups_0"), val = int32(1)]; + tensor var_10554 = conv(dilations = var_10554_dilations_0, groups = var_10554_groups_0, pad = var_10554_pad_0, pad_type = var_10554_pad_type_0, strides = var_10554_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_10494_cast_fp16)[name = string("op_10554")]; + tensor var_10559 = const()[name = string("op_10559"), val = tensor([1, 8, 1, 128])]; + tensor var_10560 = reshape(shape = var_10559, x = var_10554)[name = string("op_10560")]; + int32 var_10575 = const()[name = string("op_10575"), val = int32(-1)]; + fp16 const_544_promoted = const()[name = string("const_544_promoted"), val = fp16(-0x1p+0)]; + tensor var_10577 = mul(x = var_10516, y = const_544_promoted)[name = string("op_10577")]; + bool input_329_interleave_0 = const()[name = string("input_329_interleave_0"), val = bool(false)]; + tensor input_329 = concat(axis = var_10575, interleave = input_329_interleave_0, values = (var_10516, var_10577))[name = string("input_329")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_10572_to_fp16 = const()[name = string("op_10572_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_10572_to_fp16, x = input_329)[name = string("normed_293_cast_fp16")]; + tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; + tensor const_547 = const()[name = string("const_547"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445103104)))]; + tensor q_37 = mul(x = normed_295, y = const_547)[name = string("q_37")]; + int32 var_10600 = const()[name = string("op_10600"), val = int32(-1)]; + fp16 const_548_promoted = const()[name = string("const_548_promoted"), val = fp16(-0x1p+0)]; + tensor var_10602 = mul(x = var_10538, y = const_548_promoted)[name = string("op_10602")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331 = concat(axis = var_10600, interleave = input_331_interleave_0, values = (var_10538, var_10602))[name = string("input_331")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_10597_to_fp16 = const()[name = string("op_10597_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_10597_to_fp16, x = input_331)[name = string("normed_297_cast_fp16")]; + tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; + tensor const_551 = const()[name = string("const_551"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445103424)))]; + tensor k_37 = mul(x = normed_299, y = const_551)[name = string("k_37")]; + tensor var_10616 = mul(x = q_37, y = cos_1_cast_fp16)[name = string("op_10616")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37)[name = string("x1_73")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37)[name = string("x2_73")]; + fp16 const_554_promoted = const()[name = string("const_554_promoted"), val = fp16(-0x1p+0)]; + tensor var_10637 = mul(x = x2_73, y = const_554_promoted)[name = string("op_10637")]; + int32 var_10639 = const()[name = string("op_10639"), val = int32(-1)]; + bool var_10640_interleave_0 = const()[name = string("op_10640_interleave_0"), val = bool(false)]; + tensor var_10640 = concat(axis = var_10639, interleave = var_10640_interleave_0, values = (var_10637, x1_73))[name = string("op_10640")]; + tensor var_10641 = mul(x = var_10640, y = sin_1_cast_fp16)[name = string("op_10641")]; + tensor query_states_73 = add(x = var_10616, y = var_10641)[name = string("query_states_73")]; + tensor var_10644 = mul(x = k_37, y = cos_1_cast_fp16)[name = string("op_10644")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37)[name = string("x1_75")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37)[name = string("x2_75")]; + fp16 const_557_promoted = const()[name = string("const_557_promoted"), val = fp16(-0x1p+0)]; + tensor var_10665 = mul(x = x2_75, y = const_557_promoted)[name = string("op_10665")]; + int32 var_10667 = const()[name = string("op_10667"), val = int32(-1)]; + bool var_10668_interleave_0 = const()[name = string("op_10668_interleave_0"), val = bool(false)]; + tensor var_10668 = concat(axis = var_10667, interleave = var_10668_interleave_0, values = (var_10665, x1_75))[name = string("op_10668")]; + tensor var_10669 = mul(x = var_10668, y = sin_1_cast_fp16)[name = string("op_10669")]; + tensor key_states_73 = add(x = var_10644, y = var_10669)[name = string("key_states_73")]; + tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([18])]; + tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; + tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; + tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([19])]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_146")]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_220, concat_147_values1_0, var_1746, concat_147_values3_0))[name = string("concat_147")]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_37_stride_0, update = key_states_73, x = coreml_update_state_91)[name = string("model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_36_write_state")]; + tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_36")]; + tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([46])]; + tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; + tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; + tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([47])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_150")]; + tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; + tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_226, concat_151_values1_0, var_1746, concat_151_values3_0))[name = string("concat_151")]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_38_stride_0, update = var_10560, x = coreml_update_state_92)[name = string("model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_37_write_state")]; + tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_37")]; + tensor var_10724_begin_0 = const()[name = string("op_10724_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_10724_end_0 = const()[name = string("op_10724_end_0"), val = tensor([19, 8, 4096, 128])]; + tensor var_10724_end_mask_0 = const()[name = string("op_10724_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10724_cast_fp16 = slice_by_index(begin = var_10724_begin_0, end = var_10724_end_0, end_mask = var_10724_end_mask_0, x = coreml_update_state_93)[name = string("op_10724_cast_fp16")]; + tensor K_layer_cache_37_axes_0 = const()[name = string("K_layer_cache_37_axes_0"), val = tensor([0])]; + tensor K_layer_cache_37_cast_fp16 = squeeze(axes = K_layer_cache_37_axes_0, x = var_10724_cast_fp16)[name = string("K_layer_cache_37_cast_fp16")]; + tensor var_10731_begin_0 = const()[name = string("op_10731_begin_0"), val = tensor([46, 0, 0, 0])]; + tensor var_10731_end_0 = const()[name = string("op_10731_end_0"), val = tensor([47, 8, 4096, 128])]; + tensor var_10731_end_mask_0 = const()[name = string("op_10731_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10731_cast_fp16 = slice_by_index(begin = var_10731_begin_0, end = var_10731_end_0, end_mask = var_10731_end_mask_0, x = coreml_update_state_93)[name = string("op_10731_cast_fp16")]; + tensor V_layer_cache_37_axes_0 = const()[name = string("V_layer_cache_37_axes_0"), val = tensor([0])]; + tensor V_layer_cache_37_cast_fp16 = squeeze(axes = V_layer_cache_37_axes_0, x = var_10731_cast_fp16)[name = string("V_layer_cache_37_cast_fp16")]; + tensor x_291_axes_0 = const()[name = string("x_291_axes_0"), val = tensor([1])]; + tensor x_291_cast_fp16 = expand_dims(axes = x_291_axes_0, x = K_layer_cache_37_cast_fp16)[name = string("x_291_cast_fp16")]; + tensor var_10768 = const()[name = string("op_10768"), val = tensor([1, 2, 1, 1])]; + tensor x_293_cast_fp16 = tile(reps = var_10768, x = x_291_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_10780 = const()[name = string("op_10780"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_75_cast_fp16 = reshape(shape = var_10780, x = x_293_cast_fp16)[name = string("key_states_75_cast_fp16")]; + tensor x_297_axes_0 = const()[name = string("x_297_axes_0"), val = tensor([1])]; + tensor x_297_cast_fp16 = expand_dims(axes = x_297_axes_0, x = V_layer_cache_37_cast_fp16)[name = string("x_297_cast_fp16")]; + tensor var_10788 = const()[name = string("op_10788"), val = tensor([1, 2, 1, 1])]; + tensor x_299_cast_fp16 = tile(reps = var_10788, x = x_297_cast_fp16)[name = string("x_299_cast_fp16")]; + tensor var_10800 = const()[name = string("op_10800"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_111_cast_fp16 = reshape(shape = var_10800, x = x_299_cast_fp16)[name = string("value_states_111_cast_fp16")]; + bool var_10815_transpose_x_1 = const()[name = string("op_10815_transpose_x_1"), val = bool(false)]; + bool var_10815_transpose_y_1 = const()[name = string("op_10815_transpose_y_1"), val = bool(true)]; + tensor var_10815 = matmul(transpose_x = var_10815_transpose_x_1, transpose_y = var_10815_transpose_y_1, x = query_states_73, y = key_states_75_cast_fp16)[name = string("op_10815")]; + fp16 var_10816_to_fp16 = const()[name = string("op_10816_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_109_cast_fp16 = mul(x = var_10815, y = var_10816_to_fp16)[name = string("attn_weights_109_cast_fp16")]; + tensor attn_weights_111_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = causal_mask)[name = string("attn_weights_111_cast_fp16")]; + int32 var_10851 = const()[name = string("op_10851"), val = int32(-1)]; + tensor attn_weights_113_cast_fp16 = softmax(axis = var_10851, x = attn_weights_111_cast_fp16)[name = string("attn_weights_113_cast_fp16")]; + bool attn_output_181_transpose_x_0 = const()[name = string("attn_output_181_transpose_x_0"), val = bool(false)]; + bool attn_output_181_transpose_y_0 = const()[name = string("attn_output_181_transpose_y_0"), val = bool(false)]; + tensor attn_output_181_cast_fp16 = matmul(transpose_x = attn_output_181_transpose_x_0, transpose_y = attn_output_181_transpose_y_0, x = attn_weights_113_cast_fp16, y = value_states_111_cast_fp16)[name = string("attn_output_181_cast_fp16")]; + tensor var_10862_perm_0 = const()[name = string("op_10862_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10866 = const()[name = string("op_10866"), val = tensor([1, 1, 2048])]; + tensor var_10862_cast_fp16 = transpose(perm = var_10862_perm_0, x = attn_output_181_cast_fp16)[name = string("transpose_58")]; + tensor attn_output_185_cast_fp16 = reshape(shape = var_10866, x = var_10862_cast_fp16)[name = string("attn_output_185_cast_fp16")]; + tensor var_10871 = const()[name = string("op_10871"), val = tensor([0, 2, 1])]; + string var_10887_pad_type_0 = const()[name = string("op_10887_pad_type_0"), val = string("valid")]; + int32 var_10887_groups_0 = const()[name = string("op_10887_groups_0"), val = int32(1)]; + tensor var_10887_strides_0 = const()[name = string("op_10887_strides_0"), val = tensor([1])]; + tensor var_10887_pad_0 = const()[name = string("op_10887_pad_0"), val = tensor([0, 0])]; + tensor var_10887_dilations_0 = const()[name = string("op_10887_dilations_0"), val = tensor([1])]; + tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445103744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447200960))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10872_cast_fp16 = transpose(perm = var_10871, x = attn_output_185_cast_fp16)[name = string("transpose_57")]; + tensor var_10887_cast_fp16 = conv(dilations = var_10887_dilations_0, groups = var_10887_groups_0, pad = var_10887_pad_0, pad_type = var_10887_pad_type_0, strides = var_10887_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_10872_cast_fp16)[name = string("op_10887_cast_fp16")]; + tensor var_10891 = const()[name = string("op_10891"), val = tensor([0, 2, 1])]; + tensor attn_output_189_cast_fp16 = transpose(perm = var_10891, x = var_10887_cast_fp16)[name = string("transpose_56")]; + tensor hidden_states_189_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_189_cast_fp16")]; + int32 var_10904 = const()[name = string("op_10904"), val = int32(-1)]; + fp16 const_566_promoted_to_fp16 = const()[name = string("const_566_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10906_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_566_promoted_to_fp16)[name = string("op_10906_cast_fp16")]; + bool input_335_interleave_0 = const()[name = string("input_335_interleave_0"), val = bool(false)]; + tensor input_335_cast_fp16 = concat(axis = var_10904, interleave = input_335_interleave_0, values = (hidden_states_189_cast_fp16, var_10906_cast_fp16))[name = string("input_335_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_10901_to_fp16 = const()[name = string("op_10901_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_10901_to_fp16, x = input_335_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; + tensor const_569_promoted_to_fp16 = const()[name = string("const_569_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447266560)))]; + tensor x_301_cast_fp16 = mul(x = normed_303_cast_fp16, y = const_569_promoted_to_fp16)[name = string("x_301_cast_fp16")]; + tensor var_10931 = const()[name = string("op_10931"), val = tensor([0, 2, 1])]; + tensor input_337_axes_0 = const()[name = string("input_337_axes_0"), val = tensor([2])]; + tensor var_10932 = transpose(perm = var_10931, x = x_301_cast_fp16)[name = string("transpose_55")]; + tensor input_337 = expand_dims(axes = input_337_axes_0, x = var_10932)[name = string("input_337")]; + string input_339_pad_type_0 = const()[name = string("input_339_pad_type_0"), val = string("valid")]; + tensor input_339_strides_0 = const()[name = string("input_339_strides_0"), val = tensor([1, 1])]; + tensor input_339_pad_0 = const()[name = string("input_339_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_339_dilations_0 = const()[name = string("input_339_dilations_0"), val = tensor([1, 1])]; + int32 input_339_groups_0 = const()[name = string("input_339_groups_0"), val = int32(1)]; + tensor input_339 = conv(dilations = input_339_dilations_0, groups = input_339_groups_0, pad = input_339_pad_0, pad_type = input_339_pad_type_0, strides = input_339_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_337)[name = string("input_339")]; + string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; + tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; + tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; + int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; + tensor b_37 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_337)[name = string("b_37")]; + tensor c_37 = silu(x = input_339)[name = string("c_37")]; + tensor input_341 = mul(x = c_37, y = b_37)[name = string("input_341")]; + string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; + tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; + tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; + int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; + tensor e_37 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_341)[name = string("e_37")]; + tensor var_10954_axes_0 = const()[name = string("op_10954_axes_0"), val = tensor([2])]; + tensor var_10954 = squeeze(axes = var_10954_axes_0, x = e_37)[name = string("op_10954")]; + tensor var_10955 = const()[name = string("op_10955"), val = tensor([0, 2, 1])]; + tensor var_10956 = transpose(perm = var_10955, x = var_10954)[name = string("transpose_54")]; + tensor hidden_states_191_cast_fp16 = add(x = hidden_states_189_cast_fp16, y = var_10956)[name = string("hidden_states_191_cast_fp16")]; + int32 var_10968 = const()[name = string("op_10968"), val = int32(-1)]; + fp16 const_570_promoted_to_fp16 = const()[name = string("const_570_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10970_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = const_570_promoted_to_fp16)[name = string("op_10970_cast_fp16")]; + bool input_343_interleave_0 = const()[name = string("input_343_interleave_0"), val = bool(false)]; + tensor input_343_cast_fp16 = concat(axis = var_10968, interleave = input_343_interleave_0, values = (hidden_states_191_cast_fp16, var_10970_cast_fp16))[name = string("input_343_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_10965_to_fp16 = const()[name = string("op_10965_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_10965_to_fp16, x = input_343_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; + tensor const_573_promoted_to_fp16 = const()[name = string("const_573_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447268672)))]; + tensor hidden_states_193_cast_fp16 = mul(x = normed_307_cast_fp16, y = const_573_promoted_to_fp16)[name = string("hidden_states_193_cast_fp16")]; + tensor var_10987 = const()[name = string("op_10987"), val = tensor([0, 2, 1])]; + tensor var_10990_axes_0 = const()[name = string("op_10990_axes_0"), val = tensor([2])]; + tensor var_10988_cast_fp16 = transpose(perm = var_10987, x = hidden_states_193_cast_fp16)[name = string("transpose_53")]; + tensor var_10990_cast_fp16 = expand_dims(axes = var_10990_axes_0, x = var_10988_cast_fp16)[name = string("op_10990_cast_fp16")]; + string var_11006_pad_type_0 = const()[name = string("op_11006_pad_type_0"), val = string("valid")]; + tensor var_11006_strides_0 = const()[name = string("op_11006_strides_0"), val = tensor([1, 1])]; + tensor var_11006_pad_0 = const()[name = string("op_11006_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11006_dilations_0 = const()[name = string("op_11006_dilations_0"), val = tensor([1, 1])]; + int32 var_11006_groups_0 = const()[name = string("op_11006_groups_0"), val = int32(1)]; + tensor var_11006 = conv(dilations = var_11006_dilations_0, groups = var_11006_groups_0, pad = var_11006_pad_0, pad_type = var_11006_pad_type_0, strides = var_11006_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_10990_cast_fp16)[name = string("op_11006")]; + tensor var_11011 = const()[name = string("op_11011"), val = tensor([1, 16, 1, 128])]; + tensor var_11012 = reshape(shape = var_11011, x = var_11006)[name = string("op_11012")]; + string var_11028_pad_type_0 = const()[name = string("op_11028_pad_type_0"), val = string("valid")]; + tensor var_11028_strides_0 = const()[name = string("op_11028_strides_0"), val = tensor([1, 1])]; + tensor var_11028_pad_0 = const()[name = string("op_11028_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11028_dilations_0 = const()[name = string("op_11028_dilations_0"), val = tensor([1, 1])]; + int32 var_11028_groups_0 = const()[name = string("op_11028_groups_0"), val = int32(1)]; + tensor var_11028 = conv(dilations = var_11028_dilations_0, groups = var_11028_groups_0, pad = var_11028_pad_0, pad_type = var_11028_pad_type_0, strides = var_11028_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_10990_cast_fp16)[name = string("op_11028")]; + tensor var_11033 = const()[name = string("op_11033"), val = tensor([1, 8, 1, 128])]; + tensor var_11034 = reshape(shape = var_11033, x = var_11028)[name = string("op_11034")]; + string var_11050_pad_type_0 = const()[name = string("op_11050_pad_type_0"), val = string("valid")]; + tensor var_11050_strides_0 = const()[name = string("op_11050_strides_0"), val = tensor([1, 1])]; + tensor var_11050_pad_0 = const()[name = string("op_11050_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11050_dilations_0 = const()[name = string("op_11050_dilations_0"), val = tensor([1, 1])]; + int32 var_11050_groups_0 = const()[name = string("op_11050_groups_0"), val = int32(1)]; + tensor var_11050 = conv(dilations = var_11050_dilations_0, groups = var_11050_groups_0, pad = var_11050_pad_0, pad_type = var_11050_pad_type_0, strides = var_11050_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_10990_cast_fp16)[name = string("op_11050")]; + tensor var_11055 = const()[name = string("op_11055"), val = tensor([1, 8, 1, 128])]; + tensor var_11056 = reshape(shape = var_11055, x = var_11050)[name = string("op_11056")]; + int32 var_11071 = const()[name = string("op_11071"), val = int32(-1)]; + fp16 const_574_promoted = const()[name = string("const_574_promoted"), val = fp16(-0x1p+0)]; + tensor var_11073 = mul(x = var_11012, y = const_574_promoted)[name = string("op_11073")]; + bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; + tensor input_347 = concat(axis = var_11071, interleave = input_347_interleave_0, values = (var_11012, var_11073))[name = string("input_347")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_11068_to_fp16 = const()[name = string("op_11068_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_11068_to_fp16, x = input_347)[name = string("normed_309_cast_fp16")]; + tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_311 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311")]; + tensor const_577 = const()[name = string("const_577"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447270784)))]; + tensor q_39 = mul(x = normed_311, y = const_577)[name = string("q_39")]; + int32 var_11096 = const()[name = string("op_11096"), val = int32(-1)]; + fp16 const_578_promoted = const()[name = string("const_578_promoted"), val = fp16(-0x1p+0)]; + tensor var_11098 = mul(x = var_11034, y = const_578_promoted)[name = string("op_11098")]; + bool input_349_interleave_0 = const()[name = string("input_349_interleave_0"), val = bool(false)]; + tensor input_349 = concat(axis = var_11096, interleave = input_349_interleave_0, values = (var_11034, var_11098))[name = string("input_349")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_11093_to_fp16 = const()[name = string("op_11093_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_11093_to_fp16, x = input_349)[name = string("normed_313_cast_fp16")]; + tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_315 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315")]; + tensor const_581 = const()[name = string("const_581"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447271104)))]; + tensor k_39 = mul(x = normed_315, y = const_581)[name = string("k_39")]; + tensor var_11112 = mul(x = q_39, y = cos_1_cast_fp16)[name = string("op_11112")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39)[name = string("x1_77")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39)[name = string("x2_77")]; + fp16 const_584_promoted = const()[name = string("const_584_promoted"), val = fp16(-0x1p+0)]; + tensor var_11133 = mul(x = x2_77, y = const_584_promoted)[name = string("op_11133")]; + int32 var_11135 = const()[name = string("op_11135"), val = int32(-1)]; + bool var_11136_interleave_0 = const()[name = string("op_11136_interleave_0"), val = bool(false)]; + tensor var_11136 = concat(axis = var_11135, interleave = var_11136_interleave_0, values = (var_11133, x1_77))[name = string("op_11136")]; + tensor var_11137 = mul(x = var_11136, y = sin_1_cast_fp16)[name = string("op_11137")]; + tensor query_states_77 = add(x = var_11112, y = var_11137)[name = string("query_states_77")]; + tensor var_11140 = mul(x = k_39, y = cos_1_cast_fp16)[name = string("op_11140")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39)[name = string("x1_79")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39)[name = string("x2_79")]; + fp16 const_587_promoted = const()[name = string("const_587_promoted"), val = fp16(-0x1p+0)]; + tensor var_11161 = mul(x = x2_79, y = const_587_promoted)[name = string("op_11161")]; + int32 var_11163 = const()[name = string("op_11163"), val = int32(-1)]; + bool var_11164_interleave_0 = const()[name = string("op_11164_interleave_0"), val = bool(false)]; + tensor var_11164 = concat(axis = var_11163, interleave = var_11164_interleave_0, values = (var_11161, x1_79))[name = string("op_11164")]; + tensor var_11165 = mul(x = var_11164, y = sin_1_cast_fp16)[name = string("op_11165")]; + tensor key_states_77 = add(x = var_11140, y = var_11165)[name = string("key_states_77")]; + tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([19])]; + tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; + tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; + tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([20])]; + int32 concat_154_axis_0 = const()[name = string("concat_154_axis_0"), val = int32(0)]; + bool concat_154_interleave_0 = const()[name = string("concat_154_interleave_0"), val = bool(false)]; + tensor concat_154 = concat(axis = concat_154_axis_0, interleave = concat_154_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_154")]; + tensor concat_155_values1_0 = const()[name = string("concat_155_values1_0"), val = tensor([0])]; + tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; + int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; + bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; + tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_232, concat_155_values1_0, var_1746, concat_155_values3_0))[name = string("concat_155")]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_154, begin_mask = model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0, end = concat_155, end_mask = model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_39_stride_0, update = key_states_77, x = coreml_update_state_93)[name = string("model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_38_write_state")]; + tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_38")]; + tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([47])]; + tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; + tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; + tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([48])]; + int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; + bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; + tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_158")]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (expand_dims_238, concat_159_values1_0, var_1746, concat_159_values3_0))[name = string("concat_159")]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_158, begin_mask = model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0, end = concat_159, end_mask = model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_40_stride_0, update = var_11056, x = coreml_update_state_94)[name = string("model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_39_write_state")]; + tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_39")]; + tensor var_11220_begin_0 = const()[name = string("op_11220_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_11220_end_0 = const()[name = string("op_11220_end_0"), val = tensor([20, 8, 4096, 128])]; + tensor var_11220_end_mask_0 = const()[name = string("op_11220_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11220_cast_fp16 = slice_by_index(begin = var_11220_begin_0, end = var_11220_end_0, end_mask = var_11220_end_mask_0, x = coreml_update_state_95)[name = string("op_11220_cast_fp16")]; + tensor K_layer_cache_39_axes_0 = const()[name = string("K_layer_cache_39_axes_0"), val = tensor([0])]; + tensor K_layer_cache_39_cast_fp16 = squeeze(axes = K_layer_cache_39_axes_0, x = var_11220_cast_fp16)[name = string("K_layer_cache_39_cast_fp16")]; + tensor var_11227_begin_0 = const()[name = string("op_11227_begin_0"), val = tensor([47, 0, 0, 0])]; + tensor var_11227_end_0 = const()[name = string("op_11227_end_0"), val = tensor([48, 8, 4096, 128])]; + tensor var_11227_end_mask_0 = const()[name = string("op_11227_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11227_cast_fp16 = slice_by_index(begin = var_11227_begin_0, end = var_11227_end_0, end_mask = var_11227_end_mask_0, x = coreml_update_state_95)[name = string("op_11227_cast_fp16")]; + tensor V_layer_cache_39_axes_0 = const()[name = string("V_layer_cache_39_axes_0"), val = tensor([0])]; + tensor V_layer_cache_39_cast_fp16 = squeeze(axes = V_layer_cache_39_axes_0, x = var_11227_cast_fp16)[name = string("V_layer_cache_39_cast_fp16")]; + tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; + tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_39_cast_fp16)[name = string("x_307_cast_fp16")]; + tensor var_11264 = const()[name = string("op_11264"), val = tensor([1, 2, 1, 1])]; + tensor x_309_cast_fp16 = tile(reps = var_11264, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor var_11276 = const()[name = string("op_11276"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_79_cast_fp16 = reshape(shape = var_11276, x = x_309_cast_fp16)[name = string("key_states_79_cast_fp16")]; + tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; + tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_39_cast_fp16)[name = string("x_313_cast_fp16")]; + tensor var_11284 = const()[name = string("op_11284"), val = tensor([1, 2, 1, 1])]; + tensor x_315_cast_fp16 = tile(reps = var_11284, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; + tensor var_11296 = const()[name = string("op_11296"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_117_cast_fp16 = reshape(shape = var_11296, x = x_315_cast_fp16)[name = string("value_states_117_cast_fp16")]; + bool var_11311_transpose_x_1 = const()[name = string("op_11311_transpose_x_1"), val = bool(false)]; + bool var_11311_transpose_y_1 = const()[name = string("op_11311_transpose_y_1"), val = bool(true)]; + tensor var_11311 = matmul(transpose_x = var_11311_transpose_x_1, transpose_y = var_11311_transpose_y_1, x = query_states_77, y = key_states_79_cast_fp16)[name = string("op_11311")]; + fp16 var_11312_to_fp16 = const()[name = string("op_11312_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_115_cast_fp16 = mul(x = var_11311, y = var_11312_to_fp16)[name = string("attn_weights_115_cast_fp16")]; + tensor attn_weights_117_cast_fp16 = add(x = attn_weights_115_cast_fp16, y = causal_mask)[name = string("attn_weights_117_cast_fp16")]; + int32 var_11347 = const()[name = string("op_11347"), val = int32(-1)]; + tensor attn_weights_119_cast_fp16 = softmax(axis = var_11347, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; + bool attn_output_191_transpose_x_0 = const()[name = string("attn_output_191_transpose_x_0"), val = bool(false)]; + bool attn_output_191_transpose_y_0 = const()[name = string("attn_output_191_transpose_y_0"), val = bool(false)]; + tensor attn_output_191_cast_fp16 = matmul(transpose_x = attn_output_191_transpose_x_0, transpose_y = attn_output_191_transpose_y_0, x = attn_weights_119_cast_fp16, y = value_states_117_cast_fp16)[name = string("attn_output_191_cast_fp16")]; + tensor var_11358_perm_0 = const()[name = string("op_11358_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11362 = const()[name = string("op_11362"), val = tensor([1, 1, 2048])]; + tensor var_11358_cast_fp16 = transpose(perm = var_11358_perm_0, x = attn_output_191_cast_fp16)[name = string("transpose_52")]; + tensor attn_output_195_cast_fp16 = reshape(shape = var_11362, x = var_11358_cast_fp16)[name = string("attn_output_195_cast_fp16")]; + tensor var_11367 = const()[name = string("op_11367"), val = tensor([0, 2, 1])]; + string var_11383_pad_type_0 = const()[name = string("op_11383_pad_type_0"), val = string("valid")]; + int32 var_11383_groups_0 = const()[name = string("op_11383_groups_0"), val = int32(1)]; + tensor var_11383_strides_0 = const()[name = string("op_11383_strides_0"), val = tensor([1])]; + tensor var_11383_pad_0 = const()[name = string("op_11383_pad_0"), val = tensor([0, 0])]; + tensor var_11383_dilations_0 = const()[name = string("op_11383_dilations_0"), val = tensor([1])]; + tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447271424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449368640))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11368_cast_fp16 = transpose(perm = var_11367, x = attn_output_195_cast_fp16)[name = string("transpose_51")]; + tensor var_11383_cast_fp16 = conv(dilations = var_11383_dilations_0, groups = var_11383_groups_0, pad = var_11383_pad_0, pad_type = var_11383_pad_type_0, strides = var_11383_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_11368_cast_fp16)[name = string("op_11383_cast_fp16")]; + tensor var_11387 = const()[name = string("op_11387"), val = tensor([0, 2, 1])]; + tensor attn_output_199_cast_fp16 = transpose(perm = var_11387, x = var_11383_cast_fp16)[name = string("transpose_50")]; + tensor hidden_states_199_cast_fp16 = add(x = hidden_states_191_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; + int32 var_11400 = const()[name = string("op_11400"), val = int32(-1)]; + fp16 const_596_promoted_to_fp16 = const()[name = string("const_596_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11402_cast_fp16 = mul(x = hidden_states_199_cast_fp16, y = const_596_promoted_to_fp16)[name = string("op_11402_cast_fp16")]; + bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; + tensor input_353_cast_fp16 = concat(axis = var_11400, interleave = input_353_interleave_0, values = (hidden_states_199_cast_fp16, var_11402_cast_fp16))[name = string("input_353_cast_fp16")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_11397_to_fp16 = const()[name = string("op_11397_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_11397_to_fp16, x = input_353_cast_fp16)[name = string("normed_317_cast_fp16")]; + tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_319_cast_fp16 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319_cast_fp16")]; + tensor const_599_promoted_to_fp16 = const()[name = string("const_599_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449434240)))]; + tensor x_317_cast_fp16 = mul(x = normed_319_cast_fp16, y = const_599_promoted_to_fp16)[name = string("x_317_cast_fp16")]; + tensor var_11427 = const()[name = string("op_11427"), val = tensor([0, 2, 1])]; + tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; + tensor var_11428 = transpose(perm = var_11427, x = x_317_cast_fp16)[name = string("transpose_49")]; + tensor input_355 = expand_dims(axes = input_355_axes_0, x = var_11428)[name = string("input_355")]; + string input_357_pad_type_0 = const()[name = string("input_357_pad_type_0"), val = string("valid")]; + tensor input_357_strides_0 = const()[name = string("input_357_strides_0"), val = tensor([1, 1])]; + tensor input_357_pad_0 = const()[name = string("input_357_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_357_dilations_0 = const()[name = string("input_357_dilations_0"), val = tensor([1, 1])]; + int32 input_357_groups_0 = const()[name = string("input_357_groups_0"), val = int32(1)]; + tensor input_357 = conv(dilations = input_357_dilations_0, groups = input_357_groups_0, pad = input_357_pad_0, pad_type = input_357_pad_type_0, strides = input_357_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_355)[name = string("input_357")]; + string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; + tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; + tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; + int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; + tensor b_39 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_355)[name = string("b_39")]; + tensor c_39 = silu(x = input_357)[name = string("c_39")]; + tensor input_359 = mul(x = c_39, y = b_39)[name = string("input_359")]; + string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; + tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; + tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; + int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; + tensor e_39 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_359)[name = string("e_39")]; + tensor var_11450_axes_0 = const()[name = string("op_11450_axes_0"), val = tensor([2])]; + tensor var_11450 = squeeze(axes = var_11450_axes_0, x = e_39)[name = string("op_11450")]; + tensor var_11451 = const()[name = string("op_11451"), val = tensor([0, 2, 1])]; + tensor var_11452 = transpose(perm = var_11451, x = var_11450)[name = string("transpose_48")]; + tensor hidden_states_201_cast_fp16 = add(x = hidden_states_199_cast_fp16, y = var_11452)[name = string("hidden_states_201_cast_fp16")]; + int32 var_11464 = const()[name = string("op_11464"), val = int32(-1)]; + fp16 const_600_promoted_to_fp16 = const()[name = string("const_600_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11466_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = const_600_promoted_to_fp16)[name = string("op_11466_cast_fp16")]; + bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; + tensor input_361_cast_fp16 = concat(axis = var_11464, interleave = input_361_interleave_0, values = (hidden_states_201_cast_fp16, var_11466_cast_fp16))[name = string("input_361_cast_fp16")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_11461_to_fp16 = const()[name = string("op_11461_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_11461_to_fp16, x = input_361_cast_fp16)[name = string("normed_321_cast_fp16")]; + tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_323_cast_fp16 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323_cast_fp16")]; + tensor const_603_promoted_to_fp16 = const()[name = string("const_603_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449436352)))]; + tensor hidden_states_203_cast_fp16 = mul(x = normed_323_cast_fp16, y = const_603_promoted_to_fp16)[name = string("hidden_states_203_cast_fp16")]; + tensor var_11483 = const()[name = string("op_11483"), val = tensor([0, 2, 1])]; + tensor var_11486_axes_0 = const()[name = string("op_11486_axes_0"), val = tensor([2])]; + tensor var_11484_cast_fp16 = transpose(perm = var_11483, x = hidden_states_203_cast_fp16)[name = string("transpose_47")]; + tensor var_11486_cast_fp16 = expand_dims(axes = var_11486_axes_0, x = var_11484_cast_fp16)[name = string("op_11486_cast_fp16")]; + string var_11502_pad_type_0 = const()[name = string("op_11502_pad_type_0"), val = string("valid")]; + tensor var_11502_strides_0 = const()[name = string("op_11502_strides_0"), val = tensor([1, 1])]; + tensor var_11502_pad_0 = const()[name = string("op_11502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11502_dilations_0 = const()[name = string("op_11502_dilations_0"), val = tensor([1, 1])]; + int32 var_11502_groups_0 = const()[name = string("op_11502_groups_0"), val = int32(1)]; + tensor var_11502 = conv(dilations = var_11502_dilations_0, groups = var_11502_groups_0, pad = var_11502_pad_0, pad_type = var_11502_pad_type_0, strides = var_11502_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_11486_cast_fp16)[name = string("op_11502")]; + tensor var_11507 = const()[name = string("op_11507"), val = tensor([1, 16, 1, 128])]; + tensor var_11508 = reshape(shape = var_11507, x = var_11502)[name = string("op_11508")]; + string var_11524_pad_type_0 = const()[name = string("op_11524_pad_type_0"), val = string("valid")]; + tensor var_11524_strides_0 = const()[name = string("op_11524_strides_0"), val = tensor([1, 1])]; + tensor var_11524_pad_0 = const()[name = string("op_11524_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11524_dilations_0 = const()[name = string("op_11524_dilations_0"), val = tensor([1, 1])]; + int32 var_11524_groups_0 = const()[name = string("op_11524_groups_0"), val = int32(1)]; + tensor var_11524 = conv(dilations = var_11524_dilations_0, groups = var_11524_groups_0, pad = var_11524_pad_0, pad_type = var_11524_pad_type_0, strides = var_11524_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_11486_cast_fp16)[name = string("op_11524")]; + tensor var_11529 = const()[name = string("op_11529"), val = tensor([1, 8, 1, 128])]; + tensor var_11530 = reshape(shape = var_11529, x = var_11524)[name = string("op_11530")]; + string var_11546_pad_type_0 = const()[name = string("op_11546_pad_type_0"), val = string("valid")]; + tensor var_11546_strides_0 = const()[name = string("op_11546_strides_0"), val = tensor([1, 1])]; + tensor var_11546_pad_0 = const()[name = string("op_11546_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11546_dilations_0 = const()[name = string("op_11546_dilations_0"), val = tensor([1, 1])]; + int32 var_11546_groups_0 = const()[name = string("op_11546_groups_0"), val = int32(1)]; + tensor var_11546 = conv(dilations = var_11546_dilations_0, groups = var_11546_groups_0, pad = var_11546_pad_0, pad_type = var_11546_pad_type_0, strides = var_11546_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_11486_cast_fp16)[name = string("op_11546")]; + tensor var_11551 = const()[name = string("op_11551"), val = tensor([1, 8, 1, 128])]; + tensor var_11552 = reshape(shape = var_11551, x = var_11546)[name = string("op_11552")]; + int32 var_11567 = const()[name = string("op_11567"), val = int32(-1)]; + fp16 const_604_promoted = const()[name = string("const_604_promoted"), val = fp16(-0x1p+0)]; + tensor var_11569 = mul(x = var_11508, y = const_604_promoted)[name = string("op_11569")]; + bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; + tensor input_365 = concat(axis = var_11567, interleave = input_365_interleave_0, values = (var_11508, var_11569))[name = string("input_365")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_11564_to_fp16 = const()[name = string("op_11564_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_11564_to_fp16, x = input_365)[name = string("normed_325_cast_fp16")]; + tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_327 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327")]; + tensor const_607 = const()[name = string("const_607"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449438464)))]; + tensor q_41 = mul(x = normed_327, y = const_607)[name = string("q_41")]; + int32 var_11592 = const()[name = string("op_11592"), val = int32(-1)]; + fp16 const_608_promoted = const()[name = string("const_608_promoted"), val = fp16(-0x1p+0)]; + tensor var_11594 = mul(x = var_11530, y = const_608_promoted)[name = string("op_11594")]; + bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; + tensor input_367 = concat(axis = var_11592, interleave = input_367_interleave_0, values = (var_11530, var_11594))[name = string("input_367")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_11589_to_fp16 = const()[name = string("op_11589_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_11589_to_fp16, x = input_367)[name = string("normed_329_cast_fp16")]; + tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_331 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331")]; + tensor const_611 = const()[name = string("const_611"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449438784)))]; + tensor k_41 = mul(x = normed_331, y = const_611)[name = string("k_41")]; + tensor var_11608 = mul(x = q_41, y = cos_1_cast_fp16)[name = string("op_11608")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41)[name = string("x1_81")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41)[name = string("x2_81")]; + fp16 const_614_promoted = const()[name = string("const_614_promoted"), val = fp16(-0x1p+0)]; + tensor var_11629 = mul(x = x2_81, y = const_614_promoted)[name = string("op_11629")]; + int32 var_11631 = const()[name = string("op_11631"), val = int32(-1)]; + bool var_11632_interleave_0 = const()[name = string("op_11632_interleave_0"), val = bool(false)]; + tensor var_11632 = concat(axis = var_11631, interleave = var_11632_interleave_0, values = (var_11629, x1_81))[name = string("op_11632")]; + tensor var_11633 = mul(x = var_11632, y = sin_1_cast_fp16)[name = string("op_11633")]; + tensor query_states_81 = add(x = var_11608, y = var_11633)[name = string("query_states_81")]; + tensor var_11636 = mul(x = k_41, y = cos_1_cast_fp16)[name = string("op_11636")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41)[name = string("x1_83")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41)[name = string("x2_83")]; + fp16 const_617_promoted = const()[name = string("const_617_promoted"), val = fp16(-0x1p+0)]; + tensor var_11657 = mul(x = x2_83, y = const_617_promoted)[name = string("op_11657")]; + int32 var_11659 = const()[name = string("op_11659"), val = int32(-1)]; + bool var_11660_interleave_0 = const()[name = string("op_11660_interleave_0"), val = bool(false)]; + tensor var_11660 = concat(axis = var_11659, interleave = var_11660_interleave_0, values = (var_11657, x1_83))[name = string("op_11660")]; + tensor var_11661 = mul(x = var_11660, y = sin_1_cast_fp16)[name = string("op_11661")]; + tensor key_states_81 = add(x = var_11636, y = var_11661)[name = string("key_states_81")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([20])]; + tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([21])]; + int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)]; + bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)]; + tensor concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_162")]; + tensor concat_163_values1_0 = const()[name = string("concat_163_values1_0"), val = tensor([0])]; + tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; + int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; + bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; + tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_244, concat_163_values1_0, var_1746, concat_163_values3_0))[name = string("concat_163")]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_162, begin_mask = model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0, end = concat_163, end_mask = model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_41_stride_0, update = key_states_81, x = coreml_update_state_95)[name = string("model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_40_write_state")]; + tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_40")]; + tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([48])]; + tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; + tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; + tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([49])]; + int32 concat_166_axis_0 = const()[name = string("concat_166_axis_0"), val = int32(0)]; + bool concat_166_interleave_0 = const()[name = string("concat_166_interleave_0"), val = bool(false)]; + tensor concat_166 = concat(axis = concat_166_axis_0, interleave = concat_166_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_166")]; + tensor concat_167_values1_0 = const()[name = string("concat_167_values1_0"), val = tensor([0])]; + tensor concat_167_values3_0 = const()[name = string("concat_167_values3_0"), val = tensor([0])]; + int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; + bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; + tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (expand_dims_250, concat_167_values1_0, var_1746, concat_167_values3_0))[name = string("concat_167")]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_166, begin_mask = model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0, end = concat_167, end_mask = model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_42_stride_0, update = var_11552, x = coreml_update_state_96)[name = string("model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_41_write_state")]; + tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_41")]; + tensor var_11716_begin_0 = const()[name = string("op_11716_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_11716_end_0 = const()[name = string("op_11716_end_0"), val = tensor([21, 8, 4096, 128])]; + tensor var_11716_end_mask_0 = const()[name = string("op_11716_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11716_cast_fp16 = slice_by_index(begin = var_11716_begin_0, end = var_11716_end_0, end_mask = var_11716_end_mask_0, x = coreml_update_state_97)[name = string("op_11716_cast_fp16")]; + tensor K_layer_cache_41_axes_0 = const()[name = string("K_layer_cache_41_axes_0"), val = tensor([0])]; + tensor K_layer_cache_41_cast_fp16 = squeeze(axes = K_layer_cache_41_axes_0, x = var_11716_cast_fp16)[name = string("K_layer_cache_41_cast_fp16")]; + tensor var_11723_begin_0 = const()[name = string("op_11723_begin_0"), val = tensor([48, 0, 0, 0])]; + tensor var_11723_end_0 = const()[name = string("op_11723_end_0"), val = tensor([49, 8, 4096, 128])]; + tensor var_11723_end_mask_0 = const()[name = string("op_11723_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11723_cast_fp16 = slice_by_index(begin = var_11723_begin_0, end = var_11723_end_0, end_mask = var_11723_end_mask_0, x = coreml_update_state_97)[name = string("op_11723_cast_fp16")]; + tensor V_layer_cache_41_axes_0 = const()[name = string("V_layer_cache_41_axes_0"), val = tensor([0])]; + tensor V_layer_cache_41_cast_fp16 = squeeze(axes = V_layer_cache_41_axes_0, x = var_11723_cast_fp16)[name = string("V_layer_cache_41_cast_fp16")]; + tensor x_323_axes_0 = const()[name = string("x_323_axes_0"), val = tensor([1])]; + tensor x_323_cast_fp16 = expand_dims(axes = x_323_axes_0, x = K_layer_cache_41_cast_fp16)[name = string("x_323_cast_fp16")]; + tensor var_11760 = const()[name = string("op_11760"), val = tensor([1, 2, 1, 1])]; + tensor x_325_cast_fp16 = tile(reps = var_11760, x = x_323_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_11772 = const()[name = string("op_11772"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_83_cast_fp16 = reshape(shape = var_11772, x = x_325_cast_fp16)[name = string("key_states_83_cast_fp16")]; + tensor x_329_axes_0 = const()[name = string("x_329_axes_0"), val = tensor([1])]; + tensor x_329_cast_fp16 = expand_dims(axes = x_329_axes_0, x = V_layer_cache_41_cast_fp16)[name = string("x_329_cast_fp16")]; + tensor var_11780 = const()[name = string("op_11780"), val = tensor([1, 2, 1, 1])]; + tensor x_331_cast_fp16 = tile(reps = var_11780, x = x_329_cast_fp16)[name = string("x_331_cast_fp16")]; + tensor var_11792 = const()[name = string("op_11792"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_123_cast_fp16 = reshape(shape = var_11792, x = x_331_cast_fp16)[name = string("value_states_123_cast_fp16")]; + bool var_11807_transpose_x_1 = const()[name = string("op_11807_transpose_x_1"), val = bool(false)]; + bool var_11807_transpose_y_1 = const()[name = string("op_11807_transpose_y_1"), val = bool(true)]; + tensor var_11807 = matmul(transpose_x = var_11807_transpose_x_1, transpose_y = var_11807_transpose_y_1, x = query_states_81, y = key_states_83_cast_fp16)[name = string("op_11807")]; + fp16 var_11808_to_fp16 = const()[name = string("op_11808_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_121_cast_fp16 = mul(x = var_11807, y = var_11808_to_fp16)[name = string("attn_weights_121_cast_fp16")]; + tensor attn_weights_123_cast_fp16 = add(x = attn_weights_121_cast_fp16, y = causal_mask)[name = string("attn_weights_123_cast_fp16")]; + int32 var_11843 = const()[name = string("op_11843"), val = int32(-1)]; + tensor attn_weights_125_cast_fp16 = softmax(axis = var_11843, x = attn_weights_123_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; + bool attn_output_201_transpose_x_0 = const()[name = string("attn_output_201_transpose_x_0"), val = bool(false)]; + bool attn_output_201_transpose_y_0 = const()[name = string("attn_output_201_transpose_y_0"), val = bool(false)]; + tensor attn_output_201_cast_fp16 = matmul(transpose_x = attn_output_201_transpose_x_0, transpose_y = attn_output_201_transpose_y_0, x = attn_weights_125_cast_fp16, y = value_states_123_cast_fp16)[name = string("attn_output_201_cast_fp16")]; + tensor var_11854_perm_0 = const()[name = string("op_11854_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11858 = const()[name = string("op_11858"), val = tensor([1, 1, 2048])]; + tensor var_11854_cast_fp16 = transpose(perm = var_11854_perm_0, x = attn_output_201_cast_fp16)[name = string("transpose_46")]; + tensor attn_output_205_cast_fp16 = reshape(shape = var_11858, x = var_11854_cast_fp16)[name = string("attn_output_205_cast_fp16")]; + tensor var_11863 = const()[name = string("op_11863"), val = tensor([0, 2, 1])]; + string var_11879_pad_type_0 = const()[name = string("op_11879_pad_type_0"), val = string("valid")]; + int32 var_11879_groups_0 = const()[name = string("op_11879_groups_0"), val = int32(1)]; + tensor var_11879_strides_0 = const()[name = string("op_11879_strides_0"), val = tensor([1])]; + tensor var_11879_pad_0 = const()[name = string("op_11879_pad_0"), val = tensor([0, 0])]; + tensor var_11879_dilations_0 = const()[name = string("op_11879_dilations_0"), val = tensor([1])]; + tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449439104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451536320))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11864_cast_fp16 = transpose(perm = var_11863, x = attn_output_205_cast_fp16)[name = string("transpose_45")]; + tensor var_11879_cast_fp16 = conv(dilations = var_11879_dilations_0, groups = var_11879_groups_0, pad = var_11879_pad_0, pad_type = var_11879_pad_type_0, strides = var_11879_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_11864_cast_fp16)[name = string("op_11879_cast_fp16")]; + tensor var_11883 = const()[name = string("op_11883"), val = tensor([0, 2, 1])]; + tensor attn_output_209_cast_fp16 = transpose(perm = var_11883, x = var_11879_cast_fp16)[name = string("transpose_44")]; + tensor hidden_states_209_cast_fp16 = add(x = hidden_states_201_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; + int32 var_11896 = const()[name = string("op_11896"), val = int32(-1)]; + fp16 const_626_promoted_to_fp16 = const()[name = string("const_626_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11898_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_626_promoted_to_fp16)[name = string("op_11898_cast_fp16")]; + bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; + tensor input_371_cast_fp16 = concat(axis = var_11896, interleave = input_371_interleave_0, values = (hidden_states_209_cast_fp16, var_11898_cast_fp16))[name = string("input_371_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_11893_to_fp16 = const()[name = string("op_11893_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_11893_to_fp16, x = input_371_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; + tensor const_629_promoted_to_fp16 = const()[name = string("const_629_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451601920)))]; + tensor x_333_cast_fp16 = mul(x = normed_335_cast_fp16, y = const_629_promoted_to_fp16)[name = string("x_333_cast_fp16")]; + tensor var_11923 = const()[name = string("op_11923"), val = tensor([0, 2, 1])]; + tensor input_373_axes_0 = const()[name = string("input_373_axes_0"), val = tensor([2])]; + tensor var_11924 = transpose(perm = var_11923, x = x_333_cast_fp16)[name = string("transpose_43")]; + tensor input_373 = expand_dims(axes = input_373_axes_0, x = var_11924)[name = string("input_373")]; + string input_375_pad_type_0 = const()[name = string("input_375_pad_type_0"), val = string("valid")]; + tensor input_375_strides_0 = const()[name = string("input_375_strides_0"), val = tensor([1, 1])]; + tensor input_375_pad_0 = const()[name = string("input_375_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_375_dilations_0 = const()[name = string("input_375_dilations_0"), val = tensor([1, 1])]; + int32 input_375_groups_0 = const()[name = string("input_375_groups_0"), val = int32(1)]; + tensor input_375 = conv(dilations = input_375_dilations_0, groups = input_375_groups_0, pad = input_375_pad_0, pad_type = input_375_pad_type_0, strides = input_375_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_373)[name = string("input_375")]; + string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; + tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; + tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; + int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; + tensor b_41 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_373)[name = string("b_41")]; + tensor c_41 = silu(x = input_375)[name = string("c_41")]; + tensor input_377 = mul(x = c_41, y = b_41)[name = string("input_377")]; + string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; + tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; + tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; + int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; + tensor e_41 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_377)[name = string("e_41")]; + tensor var_11946_axes_0 = const()[name = string("op_11946_axes_0"), val = tensor([2])]; + tensor var_11946 = squeeze(axes = var_11946_axes_0, x = e_41)[name = string("op_11946")]; + tensor var_11947 = const()[name = string("op_11947"), val = tensor([0, 2, 1])]; + tensor var_11948 = transpose(perm = var_11947, x = var_11946)[name = string("transpose_42")]; + tensor hidden_states_211_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = var_11948)[name = string("hidden_states_211_cast_fp16")]; + int32 var_11960 = const()[name = string("op_11960"), val = int32(-1)]; + fp16 const_630_promoted_to_fp16 = const()[name = string("const_630_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11962_cast_fp16 = mul(x = hidden_states_211_cast_fp16, y = const_630_promoted_to_fp16)[name = string("op_11962_cast_fp16")]; + bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; + tensor input_379_cast_fp16 = concat(axis = var_11960, interleave = input_379_interleave_0, values = (hidden_states_211_cast_fp16, var_11962_cast_fp16))[name = string("input_379_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_11957_to_fp16 = const()[name = string("op_11957_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_11957_to_fp16, x = input_379_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; + tensor const_633_promoted_to_fp16 = const()[name = string("const_633_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451604032)))]; + tensor hidden_states_213_cast_fp16 = mul(x = normed_339_cast_fp16, y = const_633_promoted_to_fp16)[name = string("hidden_states_213_cast_fp16")]; + tensor var_11979 = const()[name = string("op_11979"), val = tensor([0, 2, 1])]; + tensor var_11982_axes_0 = const()[name = string("op_11982_axes_0"), val = tensor([2])]; + tensor var_11980_cast_fp16 = transpose(perm = var_11979, x = hidden_states_213_cast_fp16)[name = string("transpose_41")]; + tensor var_11982_cast_fp16 = expand_dims(axes = var_11982_axes_0, x = var_11980_cast_fp16)[name = string("op_11982_cast_fp16")]; + string var_11998_pad_type_0 = const()[name = string("op_11998_pad_type_0"), val = string("valid")]; + tensor var_11998_strides_0 = const()[name = string("op_11998_strides_0"), val = tensor([1, 1])]; + tensor var_11998_pad_0 = const()[name = string("op_11998_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11998_dilations_0 = const()[name = string("op_11998_dilations_0"), val = tensor([1, 1])]; + int32 var_11998_groups_0 = const()[name = string("op_11998_groups_0"), val = int32(1)]; + tensor var_11998 = conv(dilations = var_11998_dilations_0, groups = var_11998_groups_0, pad = var_11998_pad_0, pad_type = var_11998_pad_type_0, strides = var_11998_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_11982_cast_fp16)[name = string("op_11998")]; + tensor var_12003 = const()[name = string("op_12003"), val = tensor([1, 16, 1, 128])]; + tensor var_12004 = reshape(shape = var_12003, x = var_11998)[name = string("op_12004")]; + string var_12020_pad_type_0 = const()[name = string("op_12020_pad_type_0"), val = string("valid")]; + tensor var_12020_strides_0 = const()[name = string("op_12020_strides_0"), val = tensor([1, 1])]; + tensor var_12020_pad_0 = const()[name = string("op_12020_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12020_dilations_0 = const()[name = string("op_12020_dilations_0"), val = tensor([1, 1])]; + int32 var_12020_groups_0 = const()[name = string("op_12020_groups_0"), val = int32(1)]; + tensor var_12020 = conv(dilations = var_12020_dilations_0, groups = var_12020_groups_0, pad = var_12020_pad_0, pad_type = var_12020_pad_type_0, strides = var_12020_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_11982_cast_fp16)[name = string("op_12020")]; + tensor var_12025 = const()[name = string("op_12025"), val = tensor([1, 8, 1, 128])]; + tensor var_12026 = reshape(shape = var_12025, x = var_12020)[name = string("op_12026")]; + string var_12042_pad_type_0 = const()[name = string("op_12042_pad_type_0"), val = string("valid")]; + tensor var_12042_strides_0 = const()[name = string("op_12042_strides_0"), val = tensor([1, 1])]; + tensor var_12042_pad_0 = const()[name = string("op_12042_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12042_dilations_0 = const()[name = string("op_12042_dilations_0"), val = tensor([1, 1])]; + int32 var_12042_groups_0 = const()[name = string("op_12042_groups_0"), val = int32(1)]; + tensor var_12042 = conv(dilations = var_12042_dilations_0, groups = var_12042_groups_0, pad = var_12042_pad_0, pad_type = var_12042_pad_type_0, strides = var_12042_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_11982_cast_fp16)[name = string("op_12042")]; + tensor var_12047 = const()[name = string("op_12047"), val = tensor([1, 8, 1, 128])]; + tensor var_12048 = reshape(shape = var_12047, x = var_12042)[name = string("op_12048")]; + int32 var_12063 = const()[name = string("op_12063"), val = int32(-1)]; + fp16 const_634_promoted = const()[name = string("const_634_promoted"), val = fp16(-0x1p+0)]; + tensor var_12065 = mul(x = var_12004, y = const_634_promoted)[name = string("op_12065")]; + bool input_383_interleave_0 = const()[name = string("input_383_interleave_0"), val = bool(false)]; + tensor input_383 = concat(axis = var_12063, interleave = input_383_interleave_0, values = (var_12004, var_12065))[name = string("input_383")]; + tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; + fp16 var_12060_to_fp16 = const()[name = string("op_12060_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_12060_to_fp16, x = input_383)[name = string("normed_341_cast_fp16")]; + tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; + tensor const_637 = const()[name = string("const_637"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451606144)))]; + tensor q_43 = mul(x = normed_343, y = const_637)[name = string("q_43")]; + int32 var_12088 = const()[name = string("op_12088"), val = int32(-1)]; + fp16 const_638_promoted = const()[name = string("const_638_promoted"), val = fp16(-0x1p+0)]; + tensor var_12090 = mul(x = var_12026, y = const_638_promoted)[name = string("op_12090")]; + bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; + tensor input_385 = concat(axis = var_12088, interleave = input_385_interleave_0, values = (var_12026, var_12090))[name = string("input_385")]; + tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; + fp16 var_12085_to_fp16 = const()[name = string("op_12085_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_12085_to_fp16, x = input_385)[name = string("normed_345_cast_fp16")]; + tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; + tensor const_641 = const()[name = string("const_641"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451606464)))]; + tensor k_43 = mul(x = normed_347, y = const_641)[name = string("k_43")]; + tensor var_12104 = mul(x = q_43, y = cos_1_cast_fp16)[name = string("op_12104")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43)[name = string("x1_85")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43)[name = string("x2_85")]; + fp16 const_644_promoted = const()[name = string("const_644_promoted"), val = fp16(-0x1p+0)]; + tensor var_12125 = mul(x = x2_85, y = const_644_promoted)[name = string("op_12125")]; + int32 var_12127 = const()[name = string("op_12127"), val = int32(-1)]; + bool var_12128_interleave_0 = const()[name = string("op_12128_interleave_0"), val = bool(false)]; + tensor var_12128 = concat(axis = var_12127, interleave = var_12128_interleave_0, values = (var_12125, x1_85))[name = string("op_12128")]; + tensor var_12129 = mul(x = var_12128, y = sin_1_cast_fp16)[name = string("op_12129")]; + tensor query_states_85 = add(x = var_12104, y = var_12129)[name = string("query_states_85")]; + tensor var_12132 = mul(x = k_43, y = cos_1_cast_fp16)[name = string("op_12132")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43)[name = string("x1_87")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43)[name = string("x2_87")]; + fp16 const_647_promoted = const()[name = string("const_647_promoted"), val = fp16(-0x1p+0)]; + tensor var_12153 = mul(x = x2_87, y = const_647_promoted)[name = string("op_12153")]; + int32 var_12155 = const()[name = string("op_12155"), val = int32(-1)]; + bool var_12156_interleave_0 = const()[name = string("op_12156_interleave_0"), val = bool(false)]; + tensor var_12156 = concat(axis = var_12155, interleave = var_12156_interleave_0, values = (var_12153, x1_87))[name = string("op_12156")]; + tensor var_12157 = mul(x = var_12156, y = sin_1_cast_fp16)[name = string("op_12157")]; + tensor key_states_85 = add(x = var_12132, y = var_12157)[name = string("key_states_85")]; + tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([21])]; + tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; + tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; + tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([22])]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_170")]; + tensor concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor([0])]; + tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; + int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; + bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; + tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_256, concat_171_values1_0, var_1746, concat_171_values3_0))[name = string("concat_171")]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_170, begin_mask = model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0, end = concat_171, end_mask = model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_43_stride_0, update = key_states_85, x = coreml_update_state_97)[name = string("model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_42_write_state")]; + tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_42")]; + tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([49])]; + tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; + tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; + tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([50])]; + int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)]; + bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)]; + tensor concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_174")]; + tensor concat_175_values1_0 = const()[name = string("concat_175_values1_0"), val = tensor([0])]; + tensor concat_175_values3_0 = const()[name = string("concat_175_values3_0"), val = tensor([0])]; + int32 concat_175_axis_0 = const()[name = string("concat_175_axis_0"), val = int32(0)]; + bool concat_175_interleave_0 = const()[name = string("concat_175_interleave_0"), val = bool(false)]; + tensor concat_175 = concat(axis = concat_175_axis_0, interleave = concat_175_interleave_0, values = (expand_dims_262, concat_175_values1_0, var_1746, concat_175_values3_0))[name = string("concat_175")]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_174, begin_mask = model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0, end = concat_175, end_mask = model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_44_stride_0, update = var_12048, x = coreml_update_state_98)[name = string("model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_43_write_state")]; + tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_43")]; + tensor var_12212_begin_0 = const()[name = string("op_12212_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_12212_end_0 = const()[name = string("op_12212_end_0"), val = tensor([22, 8, 4096, 128])]; + tensor var_12212_end_mask_0 = const()[name = string("op_12212_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12212_cast_fp16 = slice_by_index(begin = var_12212_begin_0, end = var_12212_end_0, end_mask = var_12212_end_mask_0, x = coreml_update_state_99)[name = string("op_12212_cast_fp16")]; + tensor K_layer_cache_43_axes_0 = const()[name = string("K_layer_cache_43_axes_0"), val = tensor([0])]; + tensor K_layer_cache_43_cast_fp16 = squeeze(axes = K_layer_cache_43_axes_0, x = var_12212_cast_fp16)[name = string("K_layer_cache_43_cast_fp16")]; + tensor var_12219_begin_0 = const()[name = string("op_12219_begin_0"), val = tensor([49, 0, 0, 0])]; + tensor var_12219_end_0 = const()[name = string("op_12219_end_0"), val = tensor([50, 8, 4096, 128])]; + tensor var_12219_end_mask_0 = const()[name = string("op_12219_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12219_cast_fp16 = slice_by_index(begin = var_12219_begin_0, end = var_12219_end_0, end_mask = var_12219_end_mask_0, x = coreml_update_state_99)[name = string("op_12219_cast_fp16")]; + tensor V_layer_cache_43_axes_0 = const()[name = string("V_layer_cache_43_axes_0"), val = tensor([0])]; + tensor V_layer_cache_43_cast_fp16 = squeeze(axes = V_layer_cache_43_axes_0, x = var_12219_cast_fp16)[name = string("V_layer_cache_43_cast_fp16")]; + tensor x_339_axes_0 = const()[name = string("x_339_axes_0"), val = tensor([1])]; + tensor x_339_cast_fp16 = expand_dims(axes = x_339_axes_0, x = K_layer_cache_43_cast_fp16)[name = string("x_339_cast_fp16")]; + tensor var_12256 = const()[name = string("op_12256"), val = tensor([1, 2, 1, 1])]; + tensor x_341_cast_fp16 = tile(reps = var_12256, x = x_339_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_12268 = const()[name = string("op_12268"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_87_cast_fp16 = reshape(shape = var_12268, x = x_341_cast_fp16)[name = string("key_states_87_cast_fp16")]; + tensor x_345_axes_0 = const()[name = string("x_345_axes_0"), val = tensor([1])]; + tensor x_345_cast_fp16 = expand_dims(axes = x_345_axes_0, x = V_layer_cache_43_cast_fp16)[name = string("x_345_cast_fp16")]; + tensor var_12276 = const()[name = string("op_12276"), val = tensor([1, 2, 1, 1])]; + tensor x_347_cast_fp16 = tile(reps = var_12276, x = x_345_cast_fp16)[name = string("x_347_cast_fp16")]; + tensor var_12288 = const()[name = string("op_12288"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_129_cast_fp16 = reshape(shape = var_12288, x = x_347_cast_fp16)[name = string("value_states_129_cast_fp16")]; + bool var_12303_transpose_x_1 = const()[name = string("op_12303_transpose_x_1"), val = bool(false)]; + bool var_12303_transpose_y_1 = const()[name = string("op_12303_transpose_y_1"), val = bool(true)]; + tensor var_12303 = matmul(transpose_x = var_12303_transpose_x_1, transpose_y = var_12303_transpose_y_1, x = query_states_85, y = key_states_87_cast_fp16)[name = string("op_12303")]; + fp16 var_12304_to_fp16 = const()[name = string("op_12304_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_127_cast_fp16 = mul(x = var_12303, y = var_12304_to_fp16)[name = string("attn_weights_127_cast_fp16")]; + tensor attn_weights_129_cast_fp16 = add(x = attn_weights_127_cast_fp16, y = causal_mask)[name = string("attn_weights_129_cast_fp16")]; + int32 var_12339 = const()[name = string("op_12339"), val = int32(-1)]; + tensor attn_weights_131_cast_fp16 = softmax(axis = var_12339, x = attn_weights_129_cast_fp16)[name = string("attn_weights_131_cast_fp16")]; + bool attn_output_211_transpose_x_0 = const()[name = string("attn_output_211_transpose_x_0"), val = bool(false)]; + bool attn_output_211_transpose_y_0 = const()[name = string("attn_output_211_transpose_y_0"), val = bool(false)]; + tensor attn_output_211_cast_fp16 = matmul(transpose_x = attn_output_211_transpose_x_0, transpose_y = attn_output_211_transpose_y_0, x = attn_weights_131_cast_fp16, y = value_states_129_cast_fp16)[name = string("attn_output_211_cast_fp16")]; + tensor var_12350_perm_0 = const()[name = string("op_12350_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12354 = const()[name = string("op_12354"), val = tensor([1, 1, 2048])]; + tensor var_12350_cast_fp16 = transpose(perm = var_12350_perm_0, x = attn_output_211_cast_fp16)[name = string("transpose_40")]; + tensor attn_output_215_cast_fp16 = reshape(shape = var_12354, x = var_12350_cast_fp16)[name = string("attn_output_215_cast_fp16")]; + tensor var_12359 = const()[name = string("op_12359"), val = tensor([0, 2, 1])]; + string var_12375_pad_type_0 = const()[name = string("op_12375_pad_type_0"), val = string("valid")]; + int32 var_12375_groups_0 = const()[name = string("op_12375_groups_0"), val = int32(1)]; + tensor var_12375_strides_0 = const()[name = string("op_12375_strides_0"), val = tensor([1])]; + tensor var_12375_pad_0 = const()[name = string("op_12375_pad_0"), val = tensor([0, 0])]; + tensor var_12375_dilations_0 = const()[name = string("op_12375_dilations_0"), val = tensor([1])]; + tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451606784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453704000))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12360_cast_fp16 = transpose(perm = var_12359, x = attn_output_215_cast_fp16)[name = string("transpose_39")]; + tensor var_12375_cast_fp16 = conv(dilations = var_12375_dilations_0, groups = var_12375_groups_0, pad = var_12375_pad_0, pad_type = var_12375_pad_type_0, strides = var_12375_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_12360_cast_fp16)[name = string("op_12375_cast_fp16")]; + tensor var_12379 = const()[name = string("op_12379"), val = tensor([0, 2, 1])]; + tensor attn_output_219_cast_fp16 = transpose(perm = var_12379, x = var_12375_cast_fp16)[name = string("transpose_38")]; + tensor hidden_states_219_cast_fp16 = add(x = hidden_states_211_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; + int32 var_12392 = const()[name = string("op_12392"), val = int32(-1)]; + fp16 const_656_promoted_to_fp16 = const()[name = string("const_656_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12394_cast_fp16 = mul(x = hidden_states_219_cast_fp16, y = const_656_promoted_to_fp16)[name = string("op_12394_cast_fp16")]; + bool input_389_interleave_0 = const()[name = string("input_389_interleave_0"), val = bool(false)]; + tensor input_389_cast_fp16 = concat(axis = var_12392, interleave = input_389_interleave_0, values = (hidden_states_219_cast_fp16, var_12394_cast_fp16))[name = string("input_389_cast_fp16")]; + tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; + fp16 var_12389_to_fp16 = const()[name = string("op_12389_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_12389_to_fp16, x = input_389_cast_fp16)[name = string("normed_349_cast_fp16")]; + tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; + tensor const_659_promoted_to_fp16 = const()[name = string("const_659_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453769600)))]; + tensor x_349_cast_fp16 = mul(x = normed_351_cast_fp16, y = const_659_promoted_to_fp16)[name = string("x_349_cast_fp16")]; + tensor var_12419 = const()[name = string("op_12419"), val = tensor([0, 2, 1])]; + tensor input_391_axes_0 = const()[name = string("input_391_axes_0"), val = tensor([2])]; + tensor var_12420 = transpose(perm = var_12419, x = x_349_cast_fp16)[name = string("transpose_37")]; + tensor input_391 = expand_dims(axes = input_391_axes_0, x = var_12420)[name = string("input_391")]; + string input_393_pad_type_0 = const()[name = string("input_393_pad_type_0"), val = string("valid")]; + tensor input_393_strides_0 = const()[name = string("input_393_strides_0"), val = tensor([1, 1])]; + tensor input_393_pad_0 = const()[name = string("input_393_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_393_dilations_0 = const()[name = string("input_393_dilations_0"), val = tensor([1, 1])]; + int32 input_393_groups_0 = const()[name = string("input_393_groups_0"), val = int32(1)]; + tensor input_393 = conv(dilations = input_393_dilations_0, groups = input_393_groups_0, pad = input_393_pad_0, pad_type = input_393_pad_type_0, strides = input_393_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_391)[name = string("input_393")]; + string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; + tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; + tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; + int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; + tensor b_43 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_391)[name = string("b_43")]; + tensor c_43 = silu(x = input_393)[name = string("c_43")]; + tensor input_395 = mul(x = c_43, y = b_43)[name = string("input_395")]; + string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; + tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; + tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; + int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; + tensor e_43 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_395)[name = string("e_43")]; + tensor var_12442_axes_0 = const()[name = string("op_12442_axes_0"), val = tensor([2])]; + tensor var_12442 = squeeze(axes = var_12442_axes_0, x = e_43)[name = string("op_12442")]; + tensor var_12443 = const()[name = string("op_12443"), val = tensor([0, 2, 1])]; + tensor var_12444 = transpose(perm = var_12443, x = var_12442)[name = string("transpose_36")]; + tensor hidden_states_221_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = var_12444)[name = string("hidden_states_221_cast_fp16")]; + int32 var_12456 = const()[name = string("op_12456"), val = int32(-1)]; + fp16 const_660_promoted_to_fp16 = const()[name = string("const_660_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12458_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_660_promoted_to_fp16)[name = string("op_12458_cast_fp16")]; + bool input_397_interleave_0 = const()[name = string("input_397_interleave_0"), val = bool(false)]; + tensor input_397_cast_fp16 = concat(axis = var_12456, interleave = input_397_interleave_0, values = (hidden_states_221_cast_fp16, var_12458_cast_fp16))[name = string("input_397_cast_fp16")]; + tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; + fp16 var_12453_to_fp16 = const()[name = string("op_12453_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_12453_to_fp16, x = input_397_cast_fp16)[name = string("normed_353_cast_fp16")]; + tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; + tensor const_663_promoted_to_fp16 = const()[name = string("const_663_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453771712)))]; + tensor hidden_states_223_cast_fp16 = mul(x = normed_355_cast_fp16, y = const_663_promoted_to_fp16)[name = string("hidden_states_223_cast_fp16")]; + tensor var_12475 = const()[name = string("op_12475"), val = tensor([0, 2, 1])]; + tensor var_12478_axes_0 = const()[name = string("op_12478_axes_0"), val = tensor([2])]; + tensor var_12476_cast_fp16 = transpose(perm = var_12475, x = hidden_states_223_cast_fp16)[name = string("transpose_35")]; + tensor var_12478_cast_fp16 = expand_dims(axes = var_12478_axes_0, x = var_12476_cast_fp16)[name = string("op_12478_cast_fp16")]; + string var_12494_pad_type_0 = const()[name = string("op_12494_pad_type_0"), val = string("valid")]; + tensor var_12494_strides_0 = const()[name = string("op_12494_strides_0"), val = tensor([1, 1])]; + tensor var_12494_pad_0 = const()[name = string("op_12494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12494_dilations_0 = const()[name = string("op_12494_dilations_0"), val = tensor([1, 1])]; + int32 var_12494_groups_0 = const()[name = string("op_12494_groups_0"), val = int32(1)]; + tensor var_12494 = conv(dilations = var_12494_dilations_0, groups = var_12494_groups_0, pad = var_12494_pad_0, pad_type = var_12494_pad_type_0, strides = var_12494_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_12478_cast_fp16)[name = string("op_12494")]; + tensor var_12499 = const()[name = string("op_12499"), val = tensor([1, 16, 1, 128])]; + tensor var_12500 = reshape(shape = var_12499, x = var_12494)[name = string("op_12500")]; + string var_12516_pad_type_0 = const()[name = string("op_12516_pad_type_0"), val = string("valid")]; + tensor var_12516_strides_0 = const()[name = string("op_12516_strides_0"), val = tensor([1, 1])]; + tensor var_12516_pad_0 = const()[name = string("op_12516_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12516_dilations_0 = const()[name = string("op_12516_dilations_0"), val = tensor([1, 1])]; + int32 var_12516_groups_0 = const()[name = string("op_12516_groups_0"), val = int32(1)]; + tensor var_12516 = conv(dilations = var_12516_dilations_0, groups = var_12516_groups_0, pad = var_12516_pad_0, pad_type = var_12516_pad_type_0, strides = var_12516_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_12478_cast_fp16)[name = string("op_12516")]; + tensor var_12521 = const()[name = string("op_12521"), val = tensor([1, 8, 1, 128])]; + tensor var_12522 = reshape(shape = var_12521, x = var_12516)[name = string("op_12522")]; + string var_12538_pad_type_0 = const()[name = string("op_12538_pad_type_0"), val = string("valid")]; + tensor var_12538_strides_0 = const()[name = string("op_12538_strides_0"), val = tensor([1, 1])]; + tensor var_12538_pad_0 = const()[name = string("op_12538_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12538_dilations_0 = const()[name = string("op_12538_dilations_0"), val = tensor([1, 1])]; + int32 var_12538_groups_0 = const()[name = string("op_12538_groups_0"), val = int32(1)]; + tensor var_12538 = conv(dilations = var_12538_dilations_0, groups = var_12538_groups_0, pad = var_12538_pad_0, pad_type = var_12538_pad_type_0, strides = var_12538_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_12478_cast_fp16)[name = string("op_12538")]; + tensor var_12543 = const()[name = string("op_12543"), val = tensor([1, 8, 1, 128])]; + tensor var_12544 = reshape(shape = var_12543, x = var_12538)[name = string("op_12544")]; + int32 var_12559 = const()[name = string("op_12559"), val = int32(-1)]; + fp16 const_664_promoted = const()[name = string("const_664_promoted"), val = fp16(-0x1p+0)]; + tensor var_12561 = mul(x = var_12500, y = const_664_promoted)[name = string("op_12561")]; + bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; + tensor input_401 = concat(axis = var_12559, interleave = input_401_interleave_0, values = (var_12500, var_12561))[name = string("input_401")]; + tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; + fp16 var_12556_to_fp16 = const()[name = string("op_12556_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_12556_to_fp16, x = input_401)[name = string("normed_357_cast_fp16")]; + tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_359 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359")]; + tensor const_667 = const()[name = string("const_667"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453773824)))]; + tensor q_45 = mul(x = normed_359, y = const_667)[name = string("q_45")]; + int32 var_12584 = const()[name = string("op_12584"), val = int32(-1)]; + fp16 const_668_promoted = const()[name = string("const_668_promoted"), val = fp16(-0x1p+0)]; + tensor var_12586 = mul(x = var_12522, y = const_668_promoted)[name = string("op_12586")]; + bool input_403_interleave_0 = const()[name = string("input_403_interleave_0"), val = bool(false)]; + tensor input_403 = concat(axis = var_12584, interleave = input_403_interleave_0, values = (var_12522, var_12586))[name = string("input_403")]; + tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; + fp16 var_12581_to_fp16 = const()[name = string("op_12581_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_12581_to_fp16, x = input_403)[name = string("normed_361_cast_fp16")]; + tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_363 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363")]; + tensor const_671 = const()[name = string("const_671"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453774144)))]; + tensor k_45 = mul(x = normed_363, y = const_671)[name = string("k_45")]; + tensor var_12600 = mul(x = q_45, y = cos_1_cast_fp16)[name = string("op_12600")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45)[name = string("x1_89")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45)[name = string("x2_89")]; + fp16 const_674_promoted = const()[name = string("const_674_promoted"), val = fp16(-0x1p+0)]; + tensor var_12621 = mul(x = x2_89, y = const_674_promoted)[name = string("op_12621")]; + int32 var_12623 = const()[name = string("op_12623"), val = int32(-1)]; + bool var_12624_interleave_0 = const()[name = string("op_12624_interleave_0"), val = bool(false)]; + tensor var_12624 = concat(axis = var_12623, interleave = var_12624_interleave_0, values = (var_12621, x1_89))[name = string("op_12624")]; + tensor var_12625 = mul(x = var_12624, y = sin_1_cast_fp16)[name = string("op_12625")]; + tensor query_states_89 = add(x = var_12600, y = var_12625)[name = string("query_states_89")]; + tensor var_12628 = mul(x = k_45, y = cos_1_cast_fp16)[name = string("op_12628")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45)[name = string("x1_91")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45)[name = string("x2_91")]; + fp16 const_677_promoted = const()[name = string("const_677_promoted"), val = fp16(-0x1p+0)]; + tensor var_12649 = mul(x = x2_91, y = const_677_promoted)[name = string("op_12649")]; + int32 var_12651 = const()[name = string("op_12651"), val = int32(-1)]; + bool var_12652_interleave_0 = const()[name = string("op_12652_interleave_0"), val = bool(false)]; + tensor var_12652 = concat(axis = var_12651, interleave = var_12652_interleave_0, values = (var_12649, x1_91))[name = string("op_12652")]; + tensor var_12653 = mul(x = var_12652, y = sin_1_cast_fp16)[name = string("op_12653")]; + tensor key_states_89 = add(x = var_12628, y = var_12653)[name = string("key_states_89")]; + tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([22])]; + tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; + tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; + tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([23])]; + int32 concat_178_axis_0 = const()[name = string("concat_178_axis_0"), val = int32(0)]; + bool concat_178_interleave_0 = const()[name = string("concat_178_interleave_0"), val = bool(false)]; + tensor concat_178 = concat(axis = concat_178_axis_0, interleave = concat_178_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_178")]; + tensor concat_179_values1_0 = const()[name = string("concat_179_values1_0"), val = tensor([0])]; + tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; + int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; + bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; + tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_268, concat_179_values1_0, var_1746, concat_179_values3_0))[name = string("concat_179")]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16 = slice_update(begin = concat_178, begin_mask = model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0, end = concat_179, end_mask = model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_45_stride_0, update = key_states_89, x = coreml_update_state_99)[name = string("model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_44_write_state")]; + tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_44")]; + tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([50])]; + tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; + tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; + tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([51])]; + int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; + bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; + tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_182")]; + tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; + tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; + int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; + bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; + tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_274, concat_183_values1_0, var_1746, concat_183_values3_0))[name = string("concat_183")]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_46_stride_0, update = var_12544, x = coreml_update_state_100)[name = string("model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_45_write_state")]; + tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_45")]; + tensor var_12708_begin_0 = const()[name = string("op_12708_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_12708_end_0 = const()[name = string("op_12708_end_0"), val = tensor([23, 8, 4096, 128])]; + tensor var_12708_end_mask_0 = const()[name = string("op_12708_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12708_cast_fp16 = slice_by_index(begin = var_12708_begin_0, end = var_12708_end_0, end_mask = var_12708_end_mask_0, x = coreml_update_state_101)[name = string("op_12708_cast_fp16")]; + tensor K_layer_cache_45_axes_0 = const()[name = string("K_layer_cache_45_axes_0"), val = tensor([0])]; + tensor K_layer_cache_45_cast_fp16 = squeeze(axes = K_layer_cache_45_axes_0, x = var_12708_cast_fp16)[name = string("K_layer_cache_45_cast_fp16")]; + tensor var_12715_begin_0 = const()[name = string("op_12715_begin_0"), val = tensor([50, 0, 0, 0])]; + tensor var_12715_end_0 = const()[name = string("op_12715_end_0"), val = tensor([51, 8, 4096, 128])]; + tensor var_12715_end_mask_0 = const()[name = string("op_12715_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12715_cast_fp16 = slice_by_index(begin = var_12715_begin_0, end = var_12715_end_0, end_mask = var_12715_end_mask_0, x = coreml_update_state_101)[name = string("op_12715_cast_fp16")]; + tensor V_layer_cache_45_axes_0 = const()[name = string("V_layer_cache_45_axes_0"), val = tensor([0])]; + tensor V_layer_cache_45_cast_fp16 = squeeze(axes = V_layer_cache_45_axes_0, x = var_12715_cast_fp16)[name = string("V_layer_cache_45_cast_fp16")]; + tensor x_355_axes_0 = const()[name = string("x_355_axes_0"), val = tensor([1])]; + tensor x_355_cast_fp16 = expand_dims(axes = x_355_axes_0, x = K_layer_cache_45_cast_fp16)[name = string("x_355_cast_fp16")]; + tensor var_12752 = const()[name = string("op_12752"), val = tensor([1, 2, 1, 1])]; + tensor x_357_cast_fp16 = tile(reps = var_12752, x = x_355_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_12764 = const()[name = string("op_12764"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_91_cast_fp16 = reshape(shape = var_12764, x = x_357_cast_fp16)[name = string("key_states_91_cast_fp16")]; + tensor x_361_axes_0 = const()[name = string("x_361_axes_0"), val = tensor([1])]; + tensor x_361_cast_fp16 = expand_dims(axes = x_361_axes_0, x = V_layer_cache_45_cast_fp16)[name = string("x_361_cast_fp16")]; + tensor var_12772 = const()[name = string("op_12772"), val = tensor([1, 2, 1, 1])]; + tensor x_363_cast_fp16 = tile(reps = var_12772, x = x_361_cast_fp16)[name = string("x_363_cast_fp16")]; + tensor var_12784 = const()[name = string("op_12784"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_135_cast_fp16 = reshape(shape = var_12784, x = x_363_cast_fp16)[name = string("value_states_135_cast_fp16")]; + bool var_12799_transpose_x_1 = const()[name = string("op_12799_transpose_x_1"), val = bool(false)]; + bool var_12799_transpose_y_1 = const()[name = string("op_12799_transpose_y_1"), val = bool(true)]; + tensor var_12799 = matmul(transpose_x = var_12799_transpose_x_1, transpose_y = var_12799_transpose_y_1, x = query_states_89, y = key_states_91_cast_fp16)[name = string("op_12799")]; + fp16 var_12800_to_fp16 = const()[name = string("op_12800_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_133_cast_fp16 = mul(x = var_12799, y = var_12800_to_fp16)[name = string("attn_weights_133_cast_fp16")]; + tensor attn_weights_135_cast_fp16 = add(x = attn_weights_133_cast_fp16, y = causal_mask)[name = string("attn_weights_135_cast_fp16")]; + int32 var_12835 = const()[name = string("op_12835"), val = int32(-1)]; + tensor attn_weights_137_cast_fp16 = softmax(axis = var_12835, x = attn_weights_135_cast_fp16)[name = string("attn_weights_137_cast_fp16")]; + bool attn_output_221_transpose_x_0 = const()[name = string("attn_output_221_transpose_x_0"), val = bool(false)]; + bool attn_output_221_transpose_y_0 = const()[name = string("attn_output_221_transpose_y_0"), val = bool(false)]; + tensor attn_output_221_cast_fp16 = matmul(transpose_x = attn_output_221_transpose_x_0, transpose_y = attn_output_221_transpose_y_0, x = attn_weights_137_cast_fp16, y = value_states_135_cast_fp16)[name = string("attn_output_221_cast_fp16")]; + tensor var_12846_perm_0 = const()[name = string("op_12846_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12850 = const()[name = string("op_12850"), val = tensor([1, 1, 2048])]; + tensor var_12846_cast_fp16 = transpose(perm = var_12846_perm_0, x = attn_output_221_cast_fp16)[name = string("transpose_34")]; + tensor attn_output_225_cast_fp16 = reshape(shape = var_12850, x = var_12846_cast_fp16)[name = string("attn_output_225_cast_fp16")]; + tensor var_12855 = const()[name = string("op_12855"), val = tensor([0, 2, 1])]; + string var_12871_pad_type_0 = const()[name = string("op_12871_pad_type_0"), val = string("valid")]; + int32 var_12871_groups_0 = const()[name = string("op_12871_groups_0"), val = int32(1)]; + tensor var_12871_strides_0 = const()[name = string("op_12871_strides_0"), val = tensor([1])]; + tensor var_12871_pad_0 = const()[name = string("op_12871_pad_0"), val = tensor([0, 0])]; + tensor var_12871_dilations_0 = const()[name = string("op_12871_dilations_0"), val = tensor([1])]; + tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453774464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455871680))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12856_cast_fp16 = transpose(perm = var_12855, x = attn_output_225_cast_fp16)[name = string("transpose_33")]; + tensor var_12871_cast_fp16 = conv(dilations = var_12871_dilations_0, groups = var_12871_groups_0, pad = var_12871_pad_0, pad_type = var_12871_pad_type_0, strides = var_12871_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_12856_cast_fp16)[name = string("op_12871_cast_fp16")]; + tensor var_12875 = const()[name = string("op_12875"), val = tensor([0, 2, 1])]; + tensor attn_output_229_cast_fp16 = transpose(perm = var_12875, x = var_12871_cast_fp16)[name = string("transpose_32")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_221_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + int32 var_12888 = const()[name = string("op_12888"), val = int32(-1)]; + fp16 const_686_promoted_to_fp16 = const()[name = string("const_686_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12890_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = const_686_promoted_to_fp16)[name = string("op_12890_cast_fp16")]; + bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; + tensor input_407_cast_fp16 = concat(axis = var_12888, interleave = input_407_interleave_0, values = (hidden_states_229_cast_fp16, var_12890_cast_fp16))[name = string("input_407_cast_fp16")]; + tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; + fp16 var_12885_to_fp16 = const()[name = string("op_12885_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_12885_to_fp16, x = input_407_cast_fp16)[name = string("normed_365_cast_fp16")]; + tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_367_cast_fp16 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367_cast_fp16")]; + tensor const_689_promoted_to_fp16 = const()[name = string("const_689_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455937280)))]; + tensor x_365_cast_fp16 = mul(x = normed_367_cast_fp16, y = const_689_promoted_to_fp16)[name = string("x_365_cast_fp16")]; + tensor var_12915 = const()[name = string("op_12915"), val = tensor([0, 2, 1])]; + tensor input_409_axes_0 = const()[name = string("input_409_axes_0"), val = tensor([2])]; + tensor var_12916 = transpose(perm = var_12915, x = x_365_cast_fp16)[name = string("transpose_31")]; + tensor input_409 = expand_dims(axes = input_409_axes_0, x = var_12916)[name = string("input_409")]; + string input_411_pad_type_0 = const()[name = string("input_411_pad_type_0"), val = string("valid")]; + tensor input_411_strides_0 = const()[name = string("input_411_strides_0"), val = tensor([1, 1])]; + tensor input_411_pad_0 = const()[name = string("input_411_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_411_dilations_0 = const()[name = string("input_411_dilations_0"), val = tensor([1, 1])]; + int32 input_411_groups_0 = const()[name = string("input_411_groups_0"), val = int32(1)]; + tensor input_411 = conv(dilations = input_411_dilations_0, groups = input_411_groups_0, pad = input_411_pad_0, pad_type = input_411_pad_type_0, strides = input_411_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_409)[name = string("input_411")]; + string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; + tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; + tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; + int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; + tensor b_45 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_409)[name = string("b_45")]; + tensor c_45 = silu(x = input_411)[name = string("c_45")]; + tensor input_413 = mul(x = c_45, y = b_45)[name = string("input_413")]; + string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; + tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; + tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; + int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; + tensor e_45 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_413)[name = string("e_45")]; + tensor var_12938_axes_0 = const()[name = string("op_12938_axes_0"), val = tensor([2])]; + tensor var_12938 = squeeze(axes = var_12938_axes_0, x = e_45)[name = string("op_12938")]; + tensor var_12939 = const()[name = string("op_12939"), val = tensor([0, 2, 1])]; + tensor var_12940 = transpose(perm = var_12939, x = var_12938)[name = string("transpose_30")]; + tensor hidden_states_231_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = var_12940)[name = string("hidden_states_231_cast_fp16")]; + int32 var_12952 = const()[name = string("op_12952"), val = int32(-1)]; + fp16 const_690_promoted_to_fp16 = const()[name = string("const_690_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12954_cast_fp16 = mul(x = hidden_states_231_cast_fp16, y = const_690_promoted_to_fp16)[name = string("op_12954_cast_fp16")]; + bool input_415_interleave_0 = const()[name = string("input_415_interleave_0"), val = bool(false)]; + tensor input_415_cast_fp16 = concat(axis = var_12952, interleave = input_415_interleave_0, values = (hidden_states_231_cast_fp16, var_12954_cast_fp16))[name = string("input_415_cast_fp16")]; + tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; + fp16 var_12949_to_fp16 = const()[name = string("op_12949_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_12949_to_fp16, x = input_415_cast_fp16)[name = string("normed_369_cast_fp16")]; + tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_371_cast_fp16 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371_cast_fp16")]; + tensor const_693_promoted_to_fp16 = const()[name = string("const_693_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455939392)))]; + tensor hidden_states_233_cast_fp16 = mul(x = normed_371_cast_fp16, y = const_693_promoted_to_fp16)[name = string("hidden_states_233_cast_fp16")]; + tensor var_12971 = const()[name = string("op_12971"), val = tensor([0, 2, 1])]; + tensor var_12974_axes_0 = const()[name = string("op_12974_axes_0"), val = tensor([2])]; + tensor var_12972_cast_fp16 = transpose(perm = var_12971, x = hidden_states_233_cast_fp16)[name = string("transpose_29")]; + tensor var_12974_cast_fp16 = expand_dims(axes = var_12974_axes_0, x = var_12972_cast_fp16)[name = string("op_12974_cast_fp16")]; + string var_12990_pad_type_0 = const()[name = string("op_12990_pad_type_0"), val = string("valid")]; + tensor var_12990_strides_0 = const()[name = string("op_12990_strides_0"), val = tensor([1, 1])]; + tensor var_12990_pad_0 = const()[name = string("op_12990_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12990_dilations_0 = const()[name = string("op_12990_dilations_0"), val = tensor([1, 1])]; + int32 var_12990_groups_0 = const()[name = string("op_12990_groups_0"), val = int32(1)]; + tensor var_12990 = conv(dilations = var_12990_dilations_0, groups = var_12990_groups_0, pad = var_12990_pad_0, pad_type = var_12990_pad_type_0, strides = var_12990_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_12974_cast_fp16)[name = string("op_12990")]; + tensor var_12995 = const()[name = string("op_12995"), val = tensor([1, 16, 1, 128])]; + tensor var_12996 = reshape(shape = var_12995, x = var_12990)[name = string("op_12996")]; + string var_13012_pad_type_0 = const()[name = string("op_13012_pad_type_0"), val = string("valid")]; + tensor var_13012_strides_0 = const()[name = string("op_13012_strides_0"), val = tensor([1, 1])]; + tensor var_13012_pad_0 = const()[name = string("op_13012_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13012_dilations_0 = const()[name = string("op_13012_dilations_0"), val = tensor([1, 1])]; + int32 var_13012_groups_0 = const()[name = string("op_13012_groups_0"), val = int32(1)]; + tensor var_13012 = conv(dilations = var_13012_dilations_0, groups = var_13012_groups_0, pad = var_13012_pad_0, pad_type = var_13012_pad_type_0, strides = var_13012_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_12974_cast_fp16)[name = string("op_13012")]; + tensor var_13017 = const()[name = string("op_13017"), val = tensor([1, 8, 1, 128])]; + tensor var_13018 = reshape(shape = var_13017, x = var_13012)[name = string("op_13018")]; + string var_13034_pad_type_0 = const()[name = string("op_13034_pad_type_0"), val = string("valid")]; + tensor var_13034_strides_0 = const()[name = string("op_13034_strides_0"), val = tensor([1, 1])]; + tensor var_13034_pad_0 = const()[name = string("op_13034_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13034_dilations_0 = const()[name = string("op_13034_dilations_0"), val = tensor([1, 1])]; + int32 var_13034_groups_0 = const()[name = string("op_13034_groups_0"), val = int32(1)]; + tensor var_13034 = conv(dilations = var_13034_dilations_0, groups = var_13034_groups_0, pad = var_13034_pad_0, pad_type = var_13034_pad_type_0, strides = var_13034_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_12974_cast_fp16)[name = string("op_13034")]; + tensor var_13039 = const()[name = string("op_13039"), val = tensor([1, 8, 1, 128])]; + tensor var_13040 = reshape(shape = var_13039, x = var_13034)[name = string("op_13040")]; + int32 var_13055 = const()[name = string("op_13055"), val = int32(-1)]; + fp16 const_694_promoted = const()[name = string("const_694_promoted"), val = fp16(-0x1p+0)]; + tensor var_13057 = mul(x = var_12996, y = const_694_promoted)[name = string("op_13057")]; + bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; + tensor input_419 = concat(axis = var_13055, interleave = input_419_interleave_0, values = (var_12996, var_13057))[name = string("input_419")]; + tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; + fp16 var_13052_to_fp16 = const()[name = string("op_13052_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_13052_to_fp16, x = input_419)[name = string("normed_373_cast_fp16")]; + tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_375 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375")]; + tensor const_697 = const()[name = string("const_697"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455941504)))]; + tensor q_47 = mul(x = normed_375, y = const_697)[name = string("q_47")]; + int32 var_13080 = const()[name = string("op_13080"), val = int32(-1)]; + fp16 const_698_promoted = const()[name = string("const_698_promoted"), val = fp16(-0x1p+0)]; + tensor var_13082 = mul(x = var_13018, y = const_698_promoted)[name = string("op_13082")]; + bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; + tensor input_421 = concat(axis = var_13080, interleave = input_421_interleave_0, values = (var_13018, var_13082))[name = string("input_421")]; + tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; + fp16 var_13077_to_fp16 = const()[name = string("op_13077_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_13077_to_fp16, x = input_421)[name = string("normed_377_cast_fp16")]; + tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_379 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379")]; + tensor const_701 = const()[name = string("const_701"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455941824)))]; + tensor k_47 = mul(x = normed_379, y = const_701)[name = string("k_47")]; + tensor var_13096 = mul(x = q_47, y = cos_1_cast_fp16)[name = string("op_13096")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47)[name = string("x1_93")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47)[name = string("x2_93")]; + fp16 const_704_promoted = const()[name = string("const_704_promoted"), val = fp16(-0x1p+0)]; + tensor var_13117 = mul(x = x2_93, y = const_704_promoted)[name = string("op_13117")]; + int32 var_13119 = const()[name = string("op_13119"), val = int32(-1)]; + bool var_13120_interleave_0 = const()[name = string("op_13120_interleave_0"), val = bool(false)]; + tensor var_13120 = concat(axis = var_13119, interleave = var_13120_interleave_0, values = (var_13117, x1_93))[name = string("op_13120")]; + tensor var_13121 = mul(x = var_13120, y = sin_1_cast_fp16)[name = string("op_13121")]; + tensor query_states_93 = add(x = var_13096, y = var_13121)[name = string("query_states_93")]; + tensor var_13124 = mul(x = k_47, y = cos_1_cast_fp16)[name = string("op_13124")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47)[name = string("x1_95")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47)[name = string("x2_95")]; + fp16 const_707_promoted = const()[name = string("const_707_promoted"), val = fp16(-0x1p+0)]; + tensor var_13145 = mul(x = x2_95, y = const_707_promoted)[name = string("op_13145")]; + int32 var_13147 = const()[name = string("op_13147"), val = int32(-1)]; + bool var_13148_interleave_0 = const()[name = string("op_13148_interleave_0"), val = bool(false)]; + tensor var_13148 = concat(axis = var_13147, interleave = var_13148_interleave_0, values = (var_13145, x1_95))[name = string("op_13148")]; + tensor var_13149 = mul(x = var_13148, y = sin_1_cast_fp16)[name = string("op_13149")]; + tensor key_states_93 = add(x = var_13124, y = var_13149)[name = string("key_states_93")]; + tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([23])]; + tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; + tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; + tensor expand_dims_280 = const()[name = string("expand_dims_280"), val = tensor([24])]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_276, expand_dims_277, current_pos, expand_dims_279))[name = string("concat_186")]; + tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; + tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; + int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; + bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; + tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_280, concat_187_values1_0, var_1746, concat_187_values3_0))[name = string("concat_187")]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_47_stride_0, update = key_states_93, x = coreml_update_state_101)[name = string("model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_46_write_state")]; + tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_46")]; + tensor expand_dims_282 = const()[name = string("expand_dims_282"), val = tensor([51])]; + tensor expand_dims_283 = const()[name = string("expand_dims_283"), val = tensor([0])]; + tensor expand_dims_285 = const()[name = string("expand_dims_285"), val = tensor([0])]; + tensor expand_dims_286 = const()[name = string("expand_dims_286"), val = tensor([52])]; + int32 concat_190_axis_0 = const()[name = string("concat_190_axis_0"), val = int32(0)]; + bool concat_190_interleave_0 = const()[name = string("concat_190_interleave_0"), val = bool(false)]; + tensor concat_190 = concat(axis = concat_190_axis_0, interleave = concat_190_interleave_0, values = (expand_dims_282, expand_dims_283, current_pos, expand_dims_285))[name = string("concat_190")]; + tensor concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = tensor([0])]; + tensor concat_191_values3_0 = const()[name = string("concat_191_values3_0"), val = tensor([0])]; + int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; + bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; + tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (expand_dims_286, concat_191_values1_0, var_1746, concat_191_values3_0))[name = string("concat_191")]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16 = slice_update(begin = concat_190, begin_mask = model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0, end = concat_191, end_mask = model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_48_stride_0, update = var_13040, x = coreml_update_state_102)[name = string("model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_47_write_state")]; + tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_47")]; + tensor var_13204_begin_0 = const()[name = string("op_13204_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_13204_end_0 = const()[name = string("op_13204_end_0"), val = tensor([24, 8, 4096, 128])]; + tensor var_13204_end_mask_0 = const()[name = string("op_13204_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13204_cast_fp16 = slice_by_index(begin = var_13204_begin_0, end = var_13204_end_0, end_mask = var_13204_end_mask_0, x = coreml_update_state_103)[name = string("op_13204_cast_fp16")]; + tensor K_layer_cache_47_axes_0 = const()[name = string("K_layer_cache_47_axes_0"), val = tensor([0])]; + tensor K_layer_cache_47_cast_fp16 = squeeze(axes = K_layer_cache_47_axes_0, x = var_13204_cast_fp16)[name = string("K_layer_cache_47_cast_fp16")]; + tensor var_13211_begin_0 = const()[name = string("op_13211_begin_0"), val = tensor([51, 0, 0, 0])]; + tensor var_13211_end_0 = const()[name = string("op_13211_end_0"), val = tensor([52, 8, 4096, 128])]; + tensor var_13211_end_mask_0 = const()[name = string("op_13211_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13211_cast_fp16 = slice_by_index(begin = var_13211_begin_0, end = var_13211_end_0, end_mask = var_13211_end_mask_0, x = coreml_update_state_103)[name = string("op_13211_cast_fp16")]; + tensor V_layer_cache_47_axes_0 = const()[name = string("V_layer_cache_47_axes_0"), val = tensor([0])]; + tensor V_layer_cache_47_cast_fp16 = squeeze(axes = V_layer_cache_47_axes_0, x = var_13211_cast_fp16)[name = string("V_layer_cache_47_cast_fp16")]; + tensor x_371_axes_0 = const()[name = string("x_371_axes_0"), val = tensor([1])]; + tensor x_371_cast_fp16 = expand_dims(axes = x_371_axes_0, x = K_layer_cache_47_cast_fp16)[name = string("x_371_cast_fp16")]; + tensor var_13248 = const()[name = string("op_13248"), val = tensor([1, 2, 1, 1])]; + tensor x_373_cast_fp16 = tile(reps = var_13248, x = x_371_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_13260 = const()[name = string("op_13260"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_95_cast_fp16 = reshape(shape = var_13260, x = x_373_cast_fp16)[name = string("key_states_95_cast_fp16")]; + tensor x_377_axes_0 = const()[name = string("x_377_axes_0"), val = tensor([1])]; + tensor x_377_cast_fp16 = expand_dims(axes = x_377_axes_0, x = V_layer_cache_47_cast_fp16)[name = string("x_377_cast_fp16")]; + tensor var_13268 = const()[name = string("op_13268"), val = tensor([1, 2, 1, 1])]; + tensor x_379_cast_fp16 = tile(reps = var_13268, x = x_377_cast_fp16)[name = string("x_379_cast_fp16")]; + tensor var_13280 = const()[name = string("op_13280"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_141_cast_fp16 = reshape(shape = var_13280, x = x_379_cast_fp16)[name = string("value_states_141_cast_fp16")]; + bool var_13295_transpose_x_1 = const()[name = string("op_13295_transpose_x_1"), val = bool(false)]; + bool var_13295_transpose_y_1 = const()[name = string("op_13295_transpose_y_1"), val = bool(true)]; + tensor var_13295 = matmul(transpose_x = var_13295_transpose_x_1, transpose_y = var_13295_transpose_y_1, x = query_states_93, y = key_states_95_cast_fp16)[name = string("op_13295")]; + fp16 var_13296_to_fp16 = const()[name = string("op_13296_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_139_cast_fp16 = mul(x = var_13295, y = var_13296_to_fp16)[name = string("attn_weights_139_cast_fp16")]; + tensor attn_weights_141_cast_fp16 = add(x = attn_weights_139_cast_fp16, y = causal_mask)[name = string("attn_weights_141_cast_fp16")]; + int32 var_13331 = const()[name = string("op_13331"), val = int32(-1)]; + tensor attn_weights_143_cast_fp16 = softmax(axis = var_13331, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; + bool attn_output_231_transpose_x_0 = const()[name = string("attn_output_231_transpose_x_0"), val = bool(false)]; + bool attn_output_231_transpose_y_0 = const()[name = string("attn_output_231_transpose_y_0"), val = bool(false)]; + tensor attn_output_231_cast_fp16 = matmul(transpose_x = attn_output_231_transpose_x_0, transpose_y = attn_output_231_transpose_y_0, x = attn_weights_143_cast_fp16, y = value_states_141_cast_fp16)[name = string("attn_output_231_cast_fp16")]; + tensor var_13342_perm_0 = const()[name = string("op_13342_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13346 = const()[name = string("op_13346"), val = tensor([1, 1, 2048])]; + tensor var_13342_cast_fp16 = transpose(perm = var_13342_perm_0, x = attn_output_231_cast_fp16)[name = string("transpose_28")]; + tensor attn_output_235_cast_fp16 = reshape(shape = var_13346, x = var_13342_cast_fp16)[name = string("attn_output_235_cast_fp16")]; + tensor var_13351 = const()[name = string("op_13351"), val = tensor([0, 2, 1])]; + string var_13367_pad_type_0 = const()[name = string("op_13367_pad_type_0"), val = string("valid")]; + int32 var_13367_groups_0 = const()[name = string("op_13367_groups_0"), val = int32(1)]; + tensor var_13367_strides_0 = const()[name = string("op_13367_strides_0"), val = tensor([1])]; + tensor var_13367_pad_0 = const()[name = string("op_13367_pad_0"), val = tensor([0, 0])]; + tensor var_13367_dilations_0 = const()[name = string("op_13367_dilations_0"), val = tensor([1])]; + tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455942144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458039360))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13352_cast_fp16 = transpose(perm = var_13351, x = attn_output_235_cast_fp16)[name = string("transpose_27")]; + tensor var_13367_cast_fp16 = conv(dilations = var_13367_dilations_0, groups = var_13367_groups_0, pad = var_13367_pad_0, pad_type = var_13367_pad_type_0, strides = var_13367_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_13352_cast_fp16)[name = string("op_13367_cast_fp16")]; + tensor var_13371 = const()[name = string("op_13371"), val = tensor([0, 2, 1])]; + tensor attn_output_239_cast_fp16 = transpose(perm = var_13371, x = var_13367_cast_fp16)[name = string("transpose_26")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_231_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + int32 var_13384 = const()[name = string("op_13384"), val = int32(-1)]; + fp16 const_716_promoted_to_fp16 = const()[name = string("const_716_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13386_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = const_716_promoted_to_fp16)[name = string("op_13386_cast_fp16")]; + bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; + tensor input_425_cast_fp16 = concat(axis = var_13384, interleave = input_425_interleave_0, values = (hidden_states_239_cast_fp16, var_13386_cast_fp16))[name = string("input_425_cast_fp16")]; + tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; + fp16 var_13381_to_fp16 = const()[name = string("op_13381_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_13381_to_fp16, x = input_425_cast_fp16)[name = string("normed_381_cast_fp16")]; + tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; + tensor const_719_promoted_to_fp16 = const()[name = string("const_719_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458104960)))]; + tensor x_381_cast_fp16 = mul(x = normed_383_cast_fp16, y = const_719_promoted_to_fp16)[name = string("x_381_cast_fp16")]; + tensor var_13411 = const()[name = string("op_13411"), val = tensor([0, 2, 1])]; + tensor input_427_axes_0 = const()[name = string("input_427_axes_0"), val = tensor([2])]; + tensor var_13412 = transpose(perm = var_13411, x = x_381_cast_fp16)[name = string("transpose_25")]; + tensor input_427 = expand_dims(axes = input_427_axes_0, x = var_13412)[name = string("input_427")]; + string input_429_pad_type_0 = const()[name = string("input_429_pad_type_0"), val = string("valid")]; + tensor input_429_strides_0 = const()[name = string("input_429_strides_0"), val = tensor([1, 1])]; + tensor input_429_pad_0 = const()[name = string("input_429_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_429_dilations_0 = const()[name = string("input_429_dilations_0"), val = tensor([1, 1])]; + int32 input_429_groups_0 = const()[name = string("input_429_groups_0"), val = int32(1)]; + tensor input_429 = conv(dilations = input_429_dilations_0, groups = input_429_groups_0, pad = input_429_pad_0, pad_type = input_429_pad_type_0, strides = input_429_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_427)[name = string("input_429")]; + string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; + tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; + tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; + int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; + tensor b_47 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_427)[name = string("b_47")]; + tensor c_47 = silu(x = input_429)[name = string("c_47")]; + tensor input_431 = mul(x = c_47, y = b_47)[name = string("input_431")]; + string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; + tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; + tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; + int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; + tensor e_47 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_431)[name = string("e_47")]; + tensor var_13434_axes_0 = const()[name = string("op_13434_axes_0"), val = tensor([2])]; + tensor var_13434 = squeeze(axes = var_13434_axes_0, x = e_47)[name = string("op_13434")]; + tensor var_13435 = const()[name = string("op_13435"), val = tensor([0, 2, 1])]; + tensor var_13436 = transpose(perm = var_13435, x = var_13434)[name = string("transpose_24")]; + tensor hidden_states_241_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = var_13436)[name = string("hidden_states_241_cast_fp16")]; + int32 var_13448 = const()[name = string("op_13448"), val = int32(-1)]; + fp16 const_720_promoted_to_fp16 = const()[name = string("const_720_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13450_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_720_promoted_to_fp16)[name = string("op_13450_cast_fp16")]; + bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; + tensor input_433_cast_fp16 = concat(axis = var_13448, interleave = input_433_interleave_0, values = (hidden_states_241_cast_fp16, var_13450_cast_fp16))[name = string("input_433_cast_fp16")]; + tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; + fp16 var_13445_to_fp16 = const()[name = string("op_13445_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_13445_to_fp16, x = input_433_cast_fp16)[name = string("normed_385_cast_fp16")]; + tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; + tensor const_723_promoted_to_fp16 = const()[name = string("const_723_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458107072)))]; + tensor hidden_states_243_cast_fp16 = mul(x = normed_387_cast_fp16, y = const_723_promoted_to_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor var_13467 = const()[name = string("op_13467"), val = tensor([0, 2, 1])]; + tensor var_13470_axes_0 = const()[name = string("op_13470_axes_0"), val = tensor([2])]; + tensor var_13468_cast_fp16 = transpose(perm = var_13467, x = hidden_states_243_cast_fp16)[name = string("transpose_23")]; + tensor var_13470_cast_fp16 = expand_dims(axes = var_13470_axes_0, x = var_13468_cast_fp16)[name = string("op_13470_cast_fp16")]; + string var_13486_pad_type_0 = const()[name = string("op_13486_pad_type_0"), val = string("valid")]; + tensor var_13486_strides_0 = const()[name = string("op_13486_strides_0"), val = tensor([1, 1])]; + tensor var_13486_pad_0 = const()[name = string("op_13486_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13486_dilations_0 = const()[name = string("op_13486_dilations_0"), val = tensor([1, 1])]; + int32 var_13486_groups_0 = const()[name = string("op_13486_groups_0"), val = int32(1)]; + tensor var_13486 = conv(dilations = var_13486_dilations_0, groups = var_13486_groups_0, pad = var_13486_pad_0, pad_type = var_13486_pad_type_0, strides = var_13486_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_13470_cast_fp16)[name = string("op_13486")]; + tensor var_13491 = const()[name = string("op_13491"), val = tensor([1, 16, 1, 128])]; + tensor var_13492 = reshape(shape = var_13491, x = var_13486)[name = string("op_13492")]; + string var_13508_pad_type_0 = const()[name = string("op_13508_pad_type_0"), val = string("valid")]; + tensor var_13508_strides_0 = const()[name = string("op_13508_strides_0"), val = tensor([1, 1])]; + tensor var_13508_pad_0 = const()[name = string("op_13508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13508_dilations_0 = const()[name = string("op_13508_dilations_0"), val = tensor([1, 1])]; + int32 var_13508_groups_0 = const()[name = string("op_13508_groups_0"), val = int32(1)]; + tensor var_13508 = conv(dilations = var_13508_dilations_0, groups = var_13508_groups_0, pad = var_13508_pad_0, pad_type = var_13508_pad_type_0, strides = var_13508_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_13470_cast_fp16)[name = string("op_13508")]; + tensor var_13513 = const()[name = string("op_13513"), val = tensor([1, 8, 1, 128])]; + tensor var_13514 = reshape(shape = var_13513, x = var_13508)[name = string("op_13514")]; + string var_13530_pad_type_0 = const()[name = string("op_13530_pad_type_0"), val = string("valid")]; + tensor var_13530_strides_0 = const()[name = string("op_13530_strides_0"), val = tensor([1, 1])]; + tensor var_13530_pad_0 = const()[name = string("op_13530_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13530_dilations_0 = const()[name = string("op_13530_dilations_0"), val = tensor([1, 1])]; + int32 var_13530_groups_0 = const()[name = string("op_13530_groups_0"), val = int32(1)]; + tensor var_13530 = conv(dilations = var_13530_dilations_0, groups = var_13530_groups_0, pad = var_13530_pad_0, pad_type = var_13530_pad_type_0, strides = var_13530_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_13470_cast_fp16)[name = string("op_13530")]; + tensor var_13535 = const()[name = string("op_13535"), val = tensor([1, 8, 1, 128])]; + tensor var_13536 = reshape(shape = var_13535, x = var_13530)[name = string("op_13536")]; + int32 var_13551 = const()[name = string("op_13551"), val = int32(-1)]; + fp16 const_724_promoted = const()[name = string("const_724_promoted"), val = fp16(-0x1p+0)]; + tensor var_13553 = mul(x = var_13492, y = const_724_promoted)[name = string("op_13553")]; + bool input_437_interleave_0 = const()[name = string("input_437_interleave_0"), val = bool(false)]; + tensor input_437 = concat(axis = var_13551, interleave = input_437_interleave_0, values = (var_13492, var_13553))[name = string("input_437")]; + tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; + fp16 var_13548_to_fp16 = const()[name = string("op_13548_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_13548_to_fp16, x = input_437)[name = string("normed_389_cast_fp16")]; + tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; + tensor const_727 = const()[name = string("const_727"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458109184)))]; + tensor q_49 = mul(x = normed_391, y = const_727)[name = string("q_49")]; + int32 var_13576 = const()[name = string("op_13576"), val = int32(-1)]; + fp16 const_728_promoted = const()[name = string("const_728_promoted"), val = fp16(-0x1p+0)]; + tensor var_13578 = mul(x = var_13514, y = const_728_promoted)[name = string("op_13578")]; + bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; + tensor input_439 = concat(axis = var_13576, interleave = input_439_interleave_0, values = (var_13514, var_13578))[name = string("input_439")]; + tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; + fp16 var_13573_to_fp16 = const()[name = string("op_13573_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_13573_to_fp16, x = input_439)[name = string("normed_393_cast_fp16")]; + tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; + tensor const_731 = const()[name = string("const_731"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458109504)))]; + tensor k_49 = mul(x = normed_395, y = const_731)[name = string("k_49")]; + tensor var_13592 = mul(x = q_49, y = cos_1_cast_fp16)[name = string("op_13592")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49)[name = string("x1_97")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49)[name = string("x2_97")]; + fp16 const_734_promoted = const()[name = string("const_734_promoted"), val = fp16(-0x1p+0)]; + tensor var_13613 = mul(x = x2_97, y = const_734_promoted)[name = string("op_13613")]; + int32 var_13615 = const()[name = string("op_13615"), val = int32(-1)]; + bool var_13616_interleave_0 = const()[name = string("op_13616_interleave_0"), val = bool(false)]; + tensor var_13616 = concat(axis = var_13615, interleave = var_13616_interleave_0, values = (var_13613, x1_97))[name = string("op_13616")]; + tensor var_13617 = mul(x = var_13616, y = sin_1_cast_fp16)[name = string("op_13617")]; + tensor query_states_97 = add(x = var_13592, y = var_13617)[name = string("query_states_97")]; + tensor var_13620 = mul(x = k_49, y = cos_1_cast_fp16)[name = string("op_13620")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49)[name = string("x1_99")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49)[name = string("x2_99")]; + fp16 const_737_promoted = const()[name = string("const_737_promoted"), val = fp16(-0x1p+0)]; + tensor var_13641 = mul(x = x2_99, y = const_737_promoted)[name = string("op_13641")]; + int32 var_13643 = const()[name = string("op_13643"), val = int32(-1)]; + bool var_13644_interleave_0 = const()[name = string("op_13644_interleave_0"), val = bool(false)]; + tensor var_13644 = concat(axis = var_13643, interleave = var_13644_interleave_0, values = (var_13641, x1_99))[name = string("op_13644")]; + tensor var_13645 = mul(x = var_13644, y = sin_1_cast_fp16)[name = string("op_13645")]; + tensor key_states_97 = add(x = var_13620, y = var_13645)[name = string("key_states_97")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([24])]; + tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; + tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; + tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([25])]; + int32 concat_194_axis_0 = const()[name = string("concat_194_axis_0"), val = int32(0)]; + bool concat_194_interleave_0 = const()[name = string("concat_194_interleave_0"), val = bool(false)]; + tensor concat_194 = concat(axis = concat_194_axis_0, interleave = concat_194_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_194")]; + tensor concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor([0])]; + tensor concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor([0])]; + int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; + bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; + tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (expand_dims_292, concat_195_values1_0, var_1746, concat_195_values3_0))[name = string("concat_195")]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16 = slice_update(begin = concat_194, begin_mask = model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0, end = concat_195, end_mask = model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_49_stride_0, update = key_states_97, x = coreml_update_state_103)[name = string("model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_48_write_state")]; + tensor coreml_update_state_104 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_48")]; + tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([52])]; + tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; + tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; + tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([53])]; + int32 concat_198_axis_0 = const()[name = string("concat_198_axis_0"), val = int32(0)]; + bool concat_198_interleave_0 = const()[name = string("concat_198_interleave_0"), val = bool(false)]; + tensor concat_198 = concat(axis = concat_198_axis_0, interleave = concat_198_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_198")]; + tensor concat_199_values1_0 = const()[name = string("concat_199_values1_0"), val = tensor([0])]; + tensor concat_199_values3_0 = const()[name = string("concat_199_values3_0"), val = tensor([0])]; + int32 concat_199_axis_0 = const()[name = string("concat_199_axis_0"), val = int32(0)]; + bool concat_199_interleave_0 = const()[name = string("concat_199_interleave_0"), val = bool(false)]; + tensor concat_199 = concat(axis = concat_199_axis_0, interleave = concat_199_interleave_0, values = (expand_dims_298, concat_199_values1_0, var_1746, concat_199_values3_0))[name = string("concat_199")]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16 = slice_update(begin = concat_198, begin_mask = model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0, end = concat_199, end_mask = model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_50_stride_0, update = var_13536, x = coreml_update_state_104)[name = string("model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_49_write_state")]; + tensor coreml_update_state_105 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_49")]; + tensor var_13700_begin_0 = const()[name = string("op_13700_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_13700_end_0 = const()[name = string("op_13700_end_0"), val = tensor([25, 8, 4096, 128])]; + tensor var_13700_end_mask_0 = const()[name = string("op_13700_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13700_cast_fp16 = slice_by_index(begin = var_13700_begin_0, end = var_13700_end_0, end_mask = var_13700_end_mask_0, x = coreml_update_state_105)[name = string("op_13700_cast_fp16")]; + tensor K_layer_cache_49_axes_0 = const()[name = string("K_layer_cache_49_axes_0"), val = tensor([0])]; + tensor K_layer_cache_49_cast_fp16 = squeeze(axes = K_layer_cache_49_axes_0, x = var_13700_cast_fp16)[name = string("K_layer_cache_49_cast_fp16")]; + tensor var_13707_begin_0 = const()[name = string("op_13707_begin_0"), val = tensor([52, 0, 0, 0])]; + tensor var_13707_end_0 = const()[name = string("op_13707_end_0"), val = tensor([53, 8, 4096, 128])]; + tensor var_13707_end_mask_0 = const()[name = string("op_13707_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13707_cast_fp16 = slice_by_index(begin = var_13707_begin_0, end = var_13707_end_0, end_mask = var_13707_end_mask_0, x = coreml_update_state_105)[name = string("op_13707_cast_fp16")]; + tensor V_layer_cache_49_axes_0 = const()[name = string("V_layer_cache_49_axes_0"), val = tensor([0])]; + tensor V_layer_cache_49_cast_fp16 = squeeze(axes = V_layer_cache_49_axes_0, x = var_13707_cast_fp16)[name = string("V_layer_cache_49_cast_fp16")]; + tensor x_387_axes_0 = const()[name = string("x_387_axes_0"), val = tensor([1])]; + tensor x_387_cast_fp16 = expand_dims(axes = x_387_axes_0, x = K_layer_cache_49_cast_fp16)[name = string("x_387_cast_fp16")]; + tensor var_13744 = const()[name = string("op_13744"), val = tensor([1, 2, 1, 1])]; + tensor x_389_cast_fp16 = tile(reps = var_13744, x = x_387_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_13756 = const()[name = string("op_13756"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_99_cast_fp16 = reshape(shape = var_13756, x = x_389_cast_fp16)[name = string("key_states_99_cast_fp16")]; + tensor x_393_axes_0 = const()[name = string("x_393_axes_0"), val = tensor([1])]; + tensor x_393_cast_fp16 = expand_dims(axes = x_393_axes_0, x = V_layer_cache_49_cast_fp16)[name = string("x_393_cast_fp16")]; + tensor var_13764 = const()[name = string("op_13764"), val = tensor([1, 2, 1, 1])]; + tensor x_395_cast_fp16 = tile(reps = var_13764, x = x_393_cast_fp16)[name = string("x_395_cast_fp16")]; + tensor var_13776 = const()[name = string("op_13776"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_147_cast_fp16 = reshape(shape = var_13776, x = x_395_cast_fp16)[name = string("value_states_147_cast_fp16")]; + bool var_13791_transpose_x_1 = const()[name = string("op_13791_transpose_x_1"), val = bool(false)]; + bool var_13791_transpose_y_1 = const()[name = string("op_13791_transpose_y_1"), val = bool(true)]; + tensor var_13791 = matmul(transpose_x = var_13791_transpose_x_1, transpose_y = var_13791_transpose_y_1, x = query_states_97, y = key_states_99_cast_fp16)[name = string("op_13791")]; + fp16 var_13792_to_fp16 = const()[name = string("op_13792_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_145_cast_fp16 = mul(x = var_13791, y = var_13792_to_fp16)[name = string("attn_weights_145_cast_fp16")]; + tensor attn_weights_147_cast_fp16 = add(x = attn_weights_145_cast_fp16, y = causal_mask)[name = string("attn_weights_147_cast_fp16")]; + int32 var_13827 = const()[name = string("op_13827"), val = int32(-1)]; + tensor attn_weights_149_cast_fp16 = softmax(axis = var_13827, x = attn_weights_147_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; + bool attn_output_241_transpose_x_0 = const()[name = string("attn_output_241_transpose_x_0"), val = bool(false)]; + bool attn_output_241_transpose_y_0 = const()[name = string("attn_output_241_transpose_y_0"), val = bool(false)]; + tensor attn_output_241_cast_fp16 = matmul(transpose_x = attn_output_241_transpose_x_0, transpose_y = attn_output_241_transpose_y_0, x = attn_weights_149_cast_fp16, y = value_states_147_cast_fp16)[name = string("attn_output_241_cast_fp16")]; + tensor var_13838_perm_0 = const()[name = string("op_13838_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13842 = const()[name = string("op_13842"), val = tensor([1, 1, 2048])]; + tensor var_13838_cast_fp16 = transpose(perm = var_13838_perm_0, x = attn_output_241_cast_fp16)[name = string("transpose_22")]; + tensor attn_output_245_cast_fp16 = reshape(shape = var_13842, x = var_13838_cast_fp16)[name = string("attn_output_245_cast_fp16")]; + tensor var_13847 = const()[name = string("op_13847"), val = tensor([0, 2, 1])]; + string var_13863_pad_type_0 = const()[name = string("op_13863_pad_type_0"), val = string("valid")]; + int32 var_13863_groups_0 = const()[name = string("op_13863_groups_0"), val = int32(1)]; + tensor var_13863_strides_0 = const()[name = string("op_13863_strides_0"), val = tensor([1])]; + tensor var_13863_pad_0 = const()[name = string("op_13863_pad_0"), val = tensor([0, 0])]; + tensor var_13863_dilations_0 = const()[name = string("op_13863_dilations_0"), val = tensor([1])]; + tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458109824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460207040))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13848_cast_fp16 = transpose(perm = var_13847, x = attn_output_245_cast_fp16)[name = string("transpose_21")]; + tensor var_13863_cast_fp16 = conv(dilations = var_13863_dilations_0, groups = var_13863_groups_0, pad = var_13863_pad_0, pad_type = var_13863_pad_type_0, strides = var_13863_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_13848_cast_fp16)[name = string("op_13863_cast_fp16")]; + tensor var_13867 = const()[name = string("op_13867"), val = tensor([0, 2, 1])]; + tensor attn_output_249_cast_fp16 = transpose(perm = var_13867, x = var_13863_cast_fp16)[name = string("transpose_20")]; + tensor hidden_states_249_cast_fp16 = add(x = hidden_states_241_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_249_cast_fp16")]; + int32 var_13880 = const()[name = string("op_13880"), val = int32(-1)]; + fp16 const_746_promoted_to_fp16 = const()[name = string("const_746_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13882_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = const_746_promoted_to_fp16)[name = string("op_13882_cast_fp16")]; + bool input_443_interleave_0 = const()[name = string("input_443_interleave_0"), val = bool(false)]; + tensor input_443_cast_fp16 = concat(axis = var_13880, interleave = input_443_interleave_0, values = (hidden_states_249_cast_fp16, var_13882_cast_fp16))[name = string("input_443_cast_fp16")]; + tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; + fp16 var_13877_to_fp16 = const()[name = string("op_13877_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_13877_to_fp16, x = input_443_cast_fp16)[name = string("normed_397_cast_fp16")]; + tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; + tensor const_749_promoted_to_fp16 = const()[name = string("const_749_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460272640)))]; + tensor x_397_cast_fp16 = mul(x = normed_399_cast_fp16, y = const_749_promoted_to_fp16)[name = string("x_397_cast_fp16")]; + tensor var_13907 = const()[name = string("op_13907"), val = tensor([0, 2, 1])]; + tensor input_445_axes_0 = const()[name = string("input_445_axes_0"), val = tensor([2])]; + tensor var_13908 = transpose(perm = var_13907, x = x_397_cast_fp16)[name = string("transpose_19")]; + tensor input_445 = expand_dims(axes = input_445_axes_0, x = var_13908)[name = string("input_445")]; + string input_447_pad_type_0 = const()[name = string("input_447_pad_type_0"), val = string("valid")]; + tensor input_447_strides_0 = const()[name = string("input_447_strides_0"), val = tensor([1, 1])]; + tensor input_447_pad_0 = const()[name = string("input_447_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_447_dilations_0 = const()[name = string("input_447_dilations_0"), val = tensor([1, 1])]; + int32 input_447_groups_0 = const()[name = string("input_447_groups_0"), val = int32(1)]; + tensor input_447 = conv(dilations = input_447_dilations_0, groups = input_447_groups_0, pad = input_447_pad_0, pad_type = input_447_pad_type_0, strides = input_447_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_445)[name = string("input_447")]; + string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; + tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; + tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; + int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; + tensor b_49 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_445)[name = string("b_49")]; + tensor c_49 = silu(x = input_447)[name = string("c_49")]; + tensor input_449 = mul(x = c_49, y = b_49)[name = string("input_449")]; + string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; + tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; + tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; + int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; + tensor e_49 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_449)[name = string("e_49")]; + tensor var_13930_axes_0 = const()[name = string("op_13930_axes_0"), val = tensor([2])]; + tensor var_13930 = squeeze(axes = var_13930_axes_0, x = e_49)[name = string("op_13930")]; + tensor var_13931 = const()[name = string("op_13931"), val = tensor([0, 2, 1])]; + tensor var_13932 = transpose(perm = var_13931, x = var_13930)[name = string("transpose_18")]; + tensor hidden_states_251_cast_fp16 = add(x = hidden_states_249_cast_fp16, y = var_13932)[name = string("hidden_states_251_cast_fp16")]; + int32 var_13944 = const()[name = string("op_13944"), val = int32(-1)]; + fp16 const_750_promoted_to_fp16 = const()[name = string("const_750_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13946_cast_fp16 = mul(x = hidden_states_251_cast_fp16, y = const_750_promoted_to_fp16)[name = string("op_13946_cast_fp16")]; + bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; + tensor input_451_cast_fp16 = concat(axis = var_13944, interleave = input_451_interleave_0, values = (hidden_states_251_cast_fp16, var_13946_cast_fp16))[name = string("input_451_cast_fp16")]; + tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; + fp16 var_13941_to_fp16 = const()[name = string("op_13941_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_13941_to_fp16, x = input_451_cast_fp16)[name = string("normed_401_cast_fp16")]; + tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; + tensor const_753_promoted_to_fp16 = const()[name = string("const_753_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460274752)))]; + tensor hidden_states_253_cast_fp16 = mul(x = normed_403_cast_fp16, y = const_753_promoted_to_fp16)[name = string("hidden_states_253_cast_fp16")]; + tensor var_13963 = const()[name = string("op_13963"), val = tensor([0, 2, 1])]; + tensor var_13966_axes_0 = const()[name = string("op_13966_axes_0"), val = tensor([2])]; + tensor var_13964_cast_fp16 = transpose(perm = var_13963, x = hidden_states_253_cast_fp16)[name = string("transpose_17")]; + tensor var_13966_cast_fp16 = expand_dims(axes = var_13966_axes_0, x = var_13964_cast_fp16)[name = string("op_13966_cast_fp16")]; + string var_13982_pad_type_0 = const()[name = string("op_13982_pad_type_0"), val = string("valid")]; + tensor var_13982_strides_0 = const()[name = string("op_13982_strides_0"), val = tensor([1, 1])]; + tensor var_13982_pad_0 = const()[name = string("op_13982_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13982_dilations_0 = const()[name = string("op_13982_dilations_0"), val = tensor([1, 1])]; + int32 var_13982_groups_0 = const()[name = string("op_13982_groups_0"), val = int32(1)]; + tensor var_13982 = conv(dilations = var_13982_dilations_0, groups = var_13982_groups_0, pad = var_13982_pad_0, pad_type = var_13982_pad_type_0, strides = var_13982_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_13966_cast_fp16)[name = string("op_13982")]; + tensor var_13987 = const()[name = string("op_13987"), val = tensor([1, 16, 1, 128])]; + tensor var_13988 = reshape(shape = var_13987, x = var_13982)[name = string("op_13988")]; + string var_14004_pad_type_0 = const()[name = string("op_14004_pad_type_0"), val = string("valid")]; + tensor var_14004_strides_0 = const()[name = string("op_14004_strides_0"), val = tensor([1, 1])]; + tensor var_14004_pad_0 = const()[name = string("op_14004_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14004_dilations_0 = const()[name = string("op_14004_dilations_0"), val = tensor([1, 1])]; + int32 var_14004_groups_0 = const()[name = string("op_14004_groups_0"), val = int32(1)]; + tensor var_14004 = conv(dilations = var_14004_dilations_0, groups = var_14004_groups_0, pad = var_14004_pad_0, pad_type = var_14004_pad_type_0, strides = var_14004_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_13966_cast_fp16)[name = string("op_14004")]; + tensor var_14009 = const()[name = string("op_14009"), val = tensor([1, 8, 1, 128])]; + tensor var_14010 = reshape(shape = var_14009, x = var_14004)[name = string("op_14010")]; + string var_14026_pad_type_0 = const()[name = string("op_14026_pad_type_0"), val = string("valid")]; + tensor var_14026_strides_0 = const()[name = string("op_14026_strides_0"), val = tensor([1, 1])]; + tensor var_14026_pad_0 = const()[name = string("op_14026_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14026_dilations_0 = const()[name = string("op_14026_dilations_0"), val = tensor([1, 1])]; + int32 var_14026_groups_0 = const()[name = string("op_14026_groups_0"), val = int32(1)]; + tensor var_14026 = conv(dilations = var_14026_dilations_0, groups = var_14026_groups_0, pad = var_14026_pad_0, pad_type = var_14026_pad_type_0, strides = var_14026_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_13966_cast_fp16)[name = string("op_14026")]; + tensor var_14031 = const()[name = string("op_14031"), val = tensor([1, 8, 1, 128])]; + tensor var_14032 = reshape(shape = var_14031, x = var_14026)[name = string("op_14032")]; + int32 var_14047 = const()[name = string("op_14047"), val = int32(-1)]; + fp16 const_754_promoted = const()[name = string("const_754_promoted"), val = fp16(-0x1p+0)]; + tensor var_14049 = mul(x = var_13988, y = const_754_promoted)[name = string("op_14049")]; + bool input_455_interleave_0 = const()[name = string("input_455_interleave_0"), val = bool(false)]; + tensor input_455 = concat(axis = var_14047, interleave = input_455_interleave_0, values = (var_13988, var_14049))[name = string("input_455")]; + tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; + fp16 var_14044_to_fp16 = const()[name = string("op_14044_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_14044_to_fp16, x = input_455)[name = string("normed_405_cast_fp16")]; + tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_407 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407")]; + tensor const_757 = const()[name = string("const_757"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460276864)))]; + tensor q_51 = mul(x = normed_407, y = const_757)[name = string("q_51")]; + int32 var_14072 = const()[name = string("op_14072"), val = int32(-1)]; + fp16 const_758_promoted = const()[name = string("const_758_promoted"), val = fp16(-0x1p+0)]; + tensor var_14074 = mul(x = var_14010, y = const_758_promoted)[name = string("op_14074")]; + bool input_457_interleave_0 = const()[name = string("input_457_interleave_0"), val = bool(false)]; + tensor input_457 = concat(axis = var_14072, interleave = input_457_interleave_0, values = (var_14010, var_14074))[name = string("input_457")]; + tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; + fp16 var_14069_to_fp16 = const()[name = string("op_14069_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_14069_to_fp16, x = input_457)[name = string("normed_409_cast_fp16")]; + tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_411 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411")]; + tensor const_761 = const()[name = string("const_761"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460277184)))]; + tensor k_51 = mul(x = normed_411, y = const_761)[name = string("k_51")]; + tensor var_14088 = mul(x = q_51, y = cos_1_cast_fp16)[name = string("op_14088")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_51)[name = string("x1_101")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_51)[name = string("x2_101")]; + fp16 const_764_promoted = const()[name = string("const_764_promoted"), val = fp16(-0x1p+0)]; + tensor var_14109 = mul(x = x2_101, y = const_764_promoted)[name = string("op_14109")]; + int32 var_14111 = const()[name = string("op_14111"), val = int32(-1)]; + bool var_14112_interleave_0 = const()[name = string("op_14112_interleave_0"), val = bool(false)]; + tensor var_14112 = concat(axis = var_14111, interleave = var_14112_interleave_0, values = (var_14109, x1_101))[name = string("op_14112")]; + tensor var_14113 = mul(x = var_14112, y = sin_1_cast_fp16)[name = string("op_14113")]; + tensor query_states_101 = add(x = var_14088, y = var_14113)[name = string("query_states_101")]; + tensor var_14116 = mul(x = k_51, y = cos_1_cast_fp16)[name = string("op_14116")]; + tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_103 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_51)[name = string("x1_103")]; + tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_103 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_51)[name = string("x2_103")]; + fp16 const_767_promoted = const()[name = string("const_767_promoted"), val = fp16(-0x1p+0)]; + tensor var_14137 = mul(x = x2_103, y = const_767_promoted)[name = string("op_14137")]; + int32 var_14139 = const()[name = string("op_14139"), val = int32(-1)]; + bool var_14140_interleave_0 = const()[name = string("op_14140_interleave_0"), val = bool(false)]; + tensor var_14140 = concat(axis = var_14139, interleave = var_14140_interleave_0, values = (var_14137, x1_103))[name = string("op_14140")]; + tensor var_14141 = mul(x = var_14140, y = sin_1_cast_fp16)[name = string("op_14141")]; + tensor key_states_101 = add(x = var_14116, y = var_14141)[name = string("key_states_101")]; + tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([25])]; + tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; + tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; + tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([26])]; + int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; + bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; + tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_202")]; + tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; + tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (expand_dims_304, concat_203_values1_0, var_1746, concat_203_values3_0))[name = string("concat_203")]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16 = slice_update(begin = concat_202, begin_mask = model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0, end = concat_203, end_mask = model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_51_stride_0, update = key_states_101, x = coreml_update_state_105)[name = string("model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_50_write_state")]; + tensor coreml_update_state_106 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_50")]; + tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([53])]; + tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; + tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; + tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([54])]; + int32 concat_206_axis_0 = const()[name = string("concat_206_axis_0"), val = int32(0)]; + bool concat_206_interleave_0 = const()[name = string("concat_206_interleave_0"), val = bool(false)]; + tensor concat_206 = concat(axis = concat_206_axis_0, interleave = concat_206_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_206")]; + tensor concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = tensor([0])]; + tensor concat_207_values3_0 = const()[name = string("concat_207_values3_0"), val = tensor([0])]; + int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; + bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; + tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (expand_dims_310, concat_207_values1_0, var_1746, concat_207_values3_0))[name = string("concat_207")]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16 = slice_update(begin = concat_206, begin_mask = model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0, end = concat_207, end_mask = model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_52_stride_0, update = var_14032, x = coreml_update_state_106)[name = string("model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_51_write_state")]; + tensor coreml_update_state_107 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_51")]; + tensor var_14196_begin_0 = const()[name = string("op_14196_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_14196_end_0 = const()[name = string("op_14196_end_0"), val = tensor([26, 8, 4096, 128])]; + tensor var_14196_end_mask_0 = const()[name = string("op_14196_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14196_cast_fp16 = slice_by_index(begin = var_14196_begin_0, end = var_14196_end_0, end_mask = var_14196_end_mask_0, x = coreml_update_state_107)[name = string("op_14196_cast_fp16")]; + tensor K_layer_cache_51_axes_0 = const()[name = string("K_layer_cache_51_axes_0"), val = tensor([0])]; + tensor K_layer_cache_51_cast_fp16 = squeeze(axes = K_layer_cache_51_axes_0, x = var_14196_cast_fp16)[name = string("K_layer_cache_51_cast_fp16")]; + tensor var_14203_begin_0 = const()[name = string("op_14203_begin_0"), val = tensor([53, 0, 0, 0])]; + tensor var_14203_end_0 = const()[name = string("op_14203_end_0"), val = tensor([54, 8, 4096, 128])]; + tensor var_14203_end_mask_0 = const()[name = string("op_14203_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14203_cast_fp16 = slice_by_index(begin = var_14203_begin_0, end = var_14203_end_0, end_mask = var_14203_end_mask_0, x = coreml_update_state_107)[name = string("op_14203_cast_fp16")]; + tensor V_layer_cache_51_axes_0 = const()[name = string("V_layer_cache_51_axes_0"), val = tensor([0])]; + tensor V_layer_cache_51_cast_fp16 = squeeze(axes = V_layer_cache_51_axes_0, x = var_14203_cast_fp16)[name = string("V_layer_cache_51_cast_fp16")]; + tensor x_403_axes_0 = const()[name = string("x_403_axes_0"), val = tensor([1])]; + tensor x_403_cast_fp16 = expand_dims(axes = x_403_axes_0, x = K_layer_cache_51_cast_fp16)[name = string("x_403_cast_fp16")]; + tensor var_14240 = const()[name = string("op_14240"), val = tensor([1, 2, 1, 1])]; + tensor x_405_cast_fp16 = tile(reps = var_14240, x = x_403_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_14252 = const()[name = string("op_14252"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_103_cast_fp16 = reshape(shape = var_14252, x = x_405_cast_fp16)[name = string("key_states_103_cast_fp16")]; + tensor x_409_axes_0 = const()[name = string("x_409_axes_0"), val = tensor([1])]; + tensor x_409_cast_fp16 = expand_dims(axes = x_409_axes_0, x = V_layer_cache_51_cast_fp16)[name = string("x_409_cast_fp16")]; + tensor var_14260 = const()[name = string("op_14260"), val = tensor([1, 2, 1, 1])]; + tensor x_411_cast_fp16 = tile(reps = var_14260, x = x_409_cast_fp16)[name = string("x_411_cast_fp16")]; + tensor var_14272 = const()[name = string("op_14272"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_153_cast_fp16 = reshape(shape = var_14272, x = x_411_cast_fp16)[name = string("value_states_153_cast_fp16")]; + bool var_14287_transpose_x_1 = const()[name = string("op_14287_transpose_x_1"), val = bool(false)]; + bool var_14287_transpose_y_1 = const()[name = string("op_14287_transpose_y_1"), val = bool(true)]; + tensor var_14287 = matmul(transpose_x = var_14287_transpose_x_1, transpose_y = var_14287_transpose_y_1, x = query_states_101, y = key_states_103_cast_fp16)[name = string("op_14287")]; + fp16 var_14288_to_fp16 = const()[name = string("op_14288_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_151_cast_fp16 = mul(x = var_14287, y = var_14288_to_fp16)[name = string("attn_weights_151_cast_fp16")]; + tensor attn_weights_153_cast_fp16 = add(x = attn_weights_151_cast_fp16, y = causal_mask)[name = string("attn_weights_153_cast_fp16")]; + int32 var_14323 = const()[name = string("op_14323"), val = int32(-1)]; + tensor attn_weights_155_cast_fp16 = softmax(axis = var_14323, x = attn_weights_153_cast_fp16)[name = string("attn_weights_155_cast_fp16")]; + bool attn_output_251_transpose_x_0 = const()[name = string("attn_output_251_transpose_x_0"), val = bool(false)]; + bool attn_output_251_transpose_y_0 = const()[name = string("attn_output_251_transpose_y_0"), val = bool(false)]; + tensor attn_output_251_cast_fp16 = matmul(transpose_x = attn_output_251_transpose_x_0, transpose_y = attn_output_251_transpose_y_0, x = attn_weights_155_cast_fp16, y = value_states_153_cast_fp16)[name = string("attn_output_251_cast_fp16")]; + tensor var_14334_perm_0 = const()[name = string("op_14334_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14338 = const()[name = string("op_14338"), val = tensor([1, 1, 2048])]; + tensor var_14334_cast_fp16 = transpose(perm = var_14334_perm_0, x = attn_output_251_cast_fp16)[name = string("transpose_16")]; + tensor attn_output_255_cast_fp16 = reshape(shape = var_14338, x = var_14334_cast_fp16)[name = string("attn_output_255_cast_fp16")]; + tensor var_14343 = const()[name = string("op_14343"), val = tensor([0, 2, 1])]; + string var_14359_pad_type_0 = const()[name = string("op_14359_pad_type_0"), val = string("valid")]; + int32 var_14359_groups_0 = const()[name = string("op_14359_groups_0"), val = int32(1)]; + tensor var_14359_strides_0 = const()[name = string("op_14359_strides_0"), val = tensor([1])]; + tensor var_14359_pad_0 = const()[name = string("op_14359_pad_0"), val = tensor([0, 0])]; + tensor var_14359_dilations_0 = const()[name = string("op_14359_dilations_0"), val = tensor([1])]; + tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460277504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462374720))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14344_cast_fp16 = transpose(perm = var_14343, x = attn_output_255_cast_fp16)[name = string("transpose_15")]; + tensor var_14359_cast_fp16 = conv(dilations = var_14359_dilations_0, groups = var_14359_groups_0, pad = var_14359_pad_0, pad_type = var_14359_pad_type_0, strides = var_14359_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_14344_cast_fp16)[name = string("op_14359_cast_fp16")]; + tensor var_14363 = const()[name = string("op_14363"), val = tensor([0, 2, 1])]; + tensor attn_output_259_cast_fp16 = transpose(perm = var_14363, x = var_14359_cast_fp16)[name = string("transpose_14")]; + tensor hidden_states_259_cast_fp16 = add(x = hidden_states_251_cast_fp16, y = attn_output_259_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; + int32 var_14376 = const()[name = string("op_14376"), val = int32(-1)]; + fp16 const_776_promoted_to_fp16 = const()[name = string("const_776_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14378_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = const_776_promoted_to_fp16)[name = string("op_14378_cast_fp16")]; + bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; + tensor input_461_cast_fp16 = concat(axis = var_14376, interleave = input_461_interleave_0, values = (hidden_states_259_cast_fp16, var_14378_cast_fp16))[name = string("input_461_cast_fp16")]; + tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; + fp16 var_14373_to_fp16 = const()[name = string("op_14373_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_14373_to_fp16, x = input_461_cast_fp16)[name = string("normed_413_cast_fp16")]; + tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_415_cast_fp16 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415_cast_fp16")]; + tensor const_779_promoted_to_fp16 = const()[name = string("const_779_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462440320)))]; + tensor x_413_cast_fp16 = mul(x = normed_415_cast_fp16, y = const_779_promoted_to_fp16)[name = string("x_413_cast_fp16")]; + tensor var_14403 = const()[name = string("op_14403"), val = tensor([0, 2, 1])]; + tensor input_463_axes_0 = const()[name = string("input_463_axes_0"), val = tensor([2])]; + tensor var_14404 = transpose(perm = var_14403, x = x_413_cast_fp16)[name = string("transpose_13")]; + tensor input_463 = expand_dims(axes = input_463_axes_0, x = var_14404)[name = string("input_463")]; + string input_465_pad_type_0 = const()[name = string("input_465_pad_type_0"), val = string("valid")]; + tensor input_465_strides_0 = const()[name = string("input_465_strides_0"), val = tensor([1, 1])]; + tensor input_465_pad_0 = const()[name = string("input_465_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_465_dilations_0 = const()[name = string("input_465_dilations_0"), val = tensor([1, 1])]; + int32 input_465_groups_0 = const()[name = string("input_465_groups_0"), val = int32(1)]; + tensor input_465 = conv(dilations = input_465_dilations_0, groups = input_465_groups_0, pad = input_465_pad_0, pad_type = input_465_pad_type_0, strides = input_465_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_463)[name = string("input_465")]; + string b_51_pad_type_0 = const()[name = string("b_51_pad_type_0"), val = string("valid")]; + tensor b_51_strides_0 = const()[name = string("b_51_strides_0"), val = tensor([1, 1])]; + tensor b_51_pad_0 = const()[name = string("b_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_51_dilations_0 = const()[name = string("b_51_dilations_0"), val = tensor([1, 1])]; + int32 b_51_groups_0 = const()[name = string("b_51_groups_0"), val = int32(1)]; + tensor b_51 = conv(dilations = b_51_dilations_0, groups = b_51_groups_0, pad = b_51_pad_0, pad_type = b_51_pad_type_0, strides = b_51_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_463)[name = string("b_51")]; + tensor c_51 = silu(x = input_465)[name = string("c_51")]; + tensor input_467 = mul(x = c_51, y = b_51)[name = string("input_467")]; + string e_51_pad_type_0 = const()[name = string("e_51_pad_type_0"), val = string("valid")]; + tensor e_51_strides_0 = const()[name = string("e_51_strides_0"), val = tensor([1, 1])]; + tensor e_51_pad_0 = const()[name = string("e_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_51_dilations_0 = const()[name = string("e_51_dilations_0"), val = tensor([1, 1])]; + int32 e_51_groups_0 = const()[name = string("e_51_groups_0"), val = int32(1)]; + tensor e_51 = conv(dilations = e_51_dilations_0, groups = e_51_groups_0, pad = e_51_pad_0, pad_type = e_51_pad_type_0, strides = e_51_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_467)[name = string("e_51")]; + tensor var_14426_axes_0 = const()[name = string("op_14426_axes_0"), val = tensor([2])]; + tensor var_14426 = squeeze(axes = var_14426_axes_0, x = e_51)[name = string("op_14426")]; + tensor var_14427 = const()[name = string("op_14427"), val = tensor([0, 2, 1])]; + tensor var_14428 = transpose(perm = var_14427, x = var_14426)[name = string("transpose_12")]; + tensor hidden_states_261_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = var_14428)[name = string("hidden_states_261_cast_fp16")]; + int32 var_14440 = const()[name = string("op_14440"), val = int32(-1)]; + fp16 const_780_promoted_to_fp16 = const()[name = string("const_780_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14442_cast_fp16 = mul(x = hidden_states_261_cast_fp16, y = const_780_promoted_to_fp16)[name = string("op_14442_cast_fp16")]; + bool input_469_interleave_0 = const()[name = string("input_469_interleave_0"), val = bool(false)]; + tensor input_469_cast_fp16 = concat(axis = var_14440, interleave = input_469_interleave_0, values = (hidden_states_261_cast_fp16, var_14442_cast_fp16))[name = string("input_469_cast_fp16")]; + tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; + fp16 var_14437_to_fp16 = const()[name = string("op_14437_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_14437_to_fp16, x = input_469_cast_fp16)[name = string("normed_417_cast_fp16")]; + tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_419_cast_fp16 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419_cast_fp16")]; + tensor const_783_promoted_to_fp16 = const()[name = string("const_783_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462442432)))]; + tensor hidden_states_263_cast_fp16 = mul(x = normed_419_cast_fp16, y = const_783_promoted_to_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor var_14459 = const()[name = string("op_14459"), val = tensor([0, 2, 1])]; + tensor var_14462_axes_0 = const()[name = string("op_14462_axes_0"), val = tensor([2])]; + tensor var_14460_cast_fp16 = transpose(perm = var_14459, x = hidden_states_263_cast_fp16)[name = string("transpose_11")]; + tensor var_14462_cast_fp16 = expand_dims(axes = var_14462_axes_0, x = var_14460_cast_fp16)[name = string("op_14462_cast_fp16")]; + string var_14478_pad_type_0 = const()[name = string("op_14478_pad_type_0"), val = string("valid")]; + tensor var_14478_strides_0 = const()[name = string("op_14478_strides_0"), val = tensor([1, 1])]; + tensor var_14478_pad_0 = const()[name = string("op_14478_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14478_dilations_0 = const()[name = string("op_14478_dilations_0"), val = tensor([1, 1])]; + int32 var_14478_groups_0 = const()[name = string("op_14478_groups_0"), val = int32(1)]; + tensor var_14478 = conv(dilations = var_14478_dilations_0, groups = var_14478_groups_0, pad = var_14478_pad_0, pad_type = var_14478_pad_type_0, strides = var_14478_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_14462_cast_fp16)[name = string("op_14478")]; + tensor var_14483 = const()[name = string("op_14483"), val = tensor([1, 16, 1, 128])]; + tensor var_14484 = reshape(shape = var_14483, x = var_14478)[name = string("op_14484")]; + string var_14500_pad_type_0 = const()[name = string("op_14500_pad_type_0"), val = string("valid")]; + tensor var_14500_strides_0 = const()[name = string("op_14500_strides_0"), val = tensor([1, 1])]; + tensor var_14500_pad_0 = const()[name = string("op_14500_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14500_dilations_0 = const()[name = string("op_14500_dilations_0"), val = tensor([1, 1])]; + int32 var_14500_groups_0 = const()[name = string("op_14500_groups_0"), val = int32(1)]; + tensor var_14500 = conv(dilations = var_14500_dilations_0, groups = var_14500_groups_0, pad = var_14500_pad_0, pad_type = var_14500_pad_type_0, strides = var_14500_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_14462_cast_fp16)[name = string("op_14500")]; + tensor var_14505 = const()[name = string("op_14505"), val = tensor([1, 8, 1, 128])]; + tensor var_14506 = reshape(shape = var_14505, x = var_14500)[name = string("op_14506")]; + string var_14522_pad_type_0 = const()[name = string("op_14522_pad_type_0"), val = string("valid")]; + tensor var_14522_strides_0 = const()[name = string("op_14522_strides_0"), val = tensor([1, 1])]; + tensor var_14522_pad_0 = const()[name = string("op_14522_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14522_dilations_0 = const()[name = string("op_14522_dilations_0"), val = tensor([1, 1])]; + int32 var_14522_groups_0 = const()[name = string("op_14522_groups_0"), val = int32(1)]; + tensor var_14522 = conv(dilations = var_14522_dilations_0, groups = var_14522_groups_0, pad = var_14522_pad_0, pad_type = var_14522_pad_type_0, strides = var_14522_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_14462_cast_fp16)[name = string("op_14522")]; + tensor var_14527 = const()[name = string("op_14527"), val = tensor([1, 8, 1, 128])]; + tensor var_14528 = reshape(shape = var_14527, x = var_14522)[name = string("op_14528")]; + int32 var_14543 = const()[name = string("op_14543"), val = int32(-1)]; + fp16 const_784_promoted = const()[name = string("const_784_promoted"), val = fp16(-0x1p+0)]; + tensor var_14545 = mul(x = var_14484, y = const_784_promoted)[name = string("op_14545")]; + bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; + tensor input_473 = concat(axis = var_14543, interleave = input_473_interleave_0, values = (var_14484, var_14545))[name = string("input_473")]; + tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; + fp16 var_14540_to_fp16 = const()[name = string("op_14540_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_14540_to_fp16, x = input_473)[name = string("normed_421_cast_fp16")]; + tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_423 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423")]; + tensor const_787 = const()[name = string("const_787"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462444544)))]; + tensor q_53 = mul(x = normed_423, y = const_787)[name = string("q_53")]; + int32 var_14568 = const()[name = string("op_14568"), val = int32(-1)]; + fp16 const_788_promoted = const()[name = string("const_788_promoted"), val = fp16(-0x1p+0)]; + tensor var_14570 = mul(x = var_14506, y = const_788_promoted)[name = string("op_14570")]; + bool input_475_interleave_0 = const()[name = string("input_475_interleave_0"), val = bool(false)]; + tensor input_475 = concat(axis = var_14568, interleave = input_475_interleave_0, values = (var_14506, var_14570))[name = string("input_475")]; + tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; + fp16 var_14565_to_fp16 = const()[name = string("op_14565_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_14565_to_fp16, x = input_475)[name = string("normed_425_cast_fp16")]; + tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_427 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427")]; + tensor const_791 = const()[name = string("const_791"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462444864)))]; + tensor k_53 = mul(x = normed_427, y = const_791)[name = string("k_53")]; + tensor var_14584 = mul(x = q_53, y = cos_1_cast_fp16)[name = string("op_14584")]; + tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_105 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_53)[name = string("x1_105")]; + tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_105 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_53)[name = string("x2_105")]; + fp16 const_794_promoted = const()[name = string("const_794_promoted"), val = fp16(-0x1p+0)]; + tensor var_14605 = mul(x = x2_105, y = const_794_promoted)[name = string("op_14605")]; + int32 var_14607 = const()[name = string("op_14607"), val = int32(-1)]; + bool var_14608_interleave_0 = const()[name = string("op_14608_interleave_0"), val = bool(false)]; + tensor var_14608 = concat(axis = var_14607, interleave = var_14608_interleave_0, values = (var_14605, x1_105))[name = string("op_14608")]; + tensor var_14609 = mul(x = var_14608, y = sin_1_cast_fp16)[name = string("op_14609")]; + tensor query_states_105 = add(x = var_14584, y = var_14609)[name = string("query_states_105")]; + tensor var_14612 = mul(x = k_53, y = cos_1_cast_fp16)[name = string("op_14612")]; + tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_107 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_53)[name = string("x1_107")]; + tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_107 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_53)[name = string("x2_107")]; + fp16 const_797_promoted = const()[name = string("const_797_promoted"), val = fp16(-0x1p+0)]; + tensor var_14633 = mul(x = x2_107, y = const_797_promoted)[name = string("op_14633")]; + int32 var_14635 = const()[name = string("op_14635"), val = int32(-1)]; + bool var_14636_interleave_0 = const()[name = string("op_14636_interleave_0"), val = bool(false)]; + tensor var_14636 = concat(axis = var_14635, interleave = var_14636_interleave_0, values = (var_14633, x1_107))[name = string("op_14636")]; + tensor var_14637 = mul(x = var_14636, y = sin_1_cast_fp16)[name = string("op_14637")]; + tensor key_states_105 = add(x = var_14612, y = var_14637)[name = string("key_states_105")]; + tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([26])]; + tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; + tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; + tensor expand_dims_316 = const()[name = string("expand_dims_316"), val = tensor([27])]; + int32 concat_210_axis_0 = const()[name = string("concat_210_axis_0"), val = int32(0)]; + bool concat_210_interleave_0 = const()[name = string("concat_210_interleave_0"), val = bool(false)]; + tensor concat_210 = concat(axis = concat_210_axis_0, interleave = concat_210_interleave_0, values = (expand_dims_312, expand_dims_313, current_pos, expand_dims_315))[name = string("concat_210")]; + tensor concat_211_values1_0 = const()[name = string("concat_211_values1_0"), val = tensor([0])]; + tensor concat_211_values3_0 = const()[name = string("concat_211_values3_0"), val = tensor([0])]; + int32 concat_211_axis_0 = const()[name = string("concat_211_axis_0"), val = int32(0)]; + bool concat_211_interleave_0 = const()[name = string("concat_211_interleave_0"), val = bool(false)]; + tensor concat_211 = concat(axis = concat_211_axis_0, interleave = concat_211_interleave_0, values = (expand_dims_316, concat_211_values1_0, var_1746, concat_211_values3_0))[name = string("concat_211")]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16 = slice_update(begin = concat_210, begin_mask = model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0, end = concat_211, end_mask = model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_53_stride_0, update = key_states_105, x = coreml_update_state_107)[name = string("model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_52_write_state")]; + tensor coreml_update_state_108 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_52")]; + tensor expand_dims_318 = const()[name = string("expand_dims_318"), val = tensor([54])]; + tensor expand_dims_319 = const()[name = string("expand_dims_319"), val = tensor([0])]; + tensor expand_dims_321 = const()[name = string("expand_dims_321"), val = tensor([0])]; + tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([55])]; + int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; + bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; + tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (expand_dims_318, expand_dims_319, current_pos, expand_dims_321))[name = string("concat_214")]; + tensor concat_215_values1_0 = const()[name = string("concat_215_values1_0"), val = tensor([0])]; + tensor concat_215_values3_0 = const()[name = string("concat_215_values3_0"), val = tensor([0])]; + int32 concat_215_axis_0 = const()[name = string("concat_215_axis_0"), val = int32(0)]; + bool concat_215_interleave_0 = const()[name = string("concat_215_interleave_0"), val = bool(false)]; + tensor concat_215 = concat(axis = concat_215_axis_0, interleave = concat_215_interleave_0, values = (expand_dims_322, concat_215_values1_0, var_1746, concat_215_values3_0))[name = string("concat_215")]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16 = slice_update(begin = concat_214, begin_mask = model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0, end = concat_215, end_mask = model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_54_stride_0, update = var_14528, x = coreml_update_state_108)[name = string("model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_53_write_state")]; + tensor coreml_update_state_109 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_53")]; + tensor var_14692_begin_0 = const()[name = string("op_14692_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_14692_end_0 = const()[name = string("op_14692_end_0"), val = tensor([27, 8, 4096, 128])]; + tensor var_14692_end_mask_0 = const()[name = string("op_14692_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14692_cast_fp16 = slice_by_index(begin = var_14692_begin_0, end = var_14692_end_0, end_mask = var_14692_end_mask_0, x = coreml_update_state_109)[name = string("op_14692_cast_fp16")]; + tensor K_layer_cache_53_axes_0 = const()[name = string("K_layer_cache_53_axes_0"), val = tensor([0])]; + tensor K_layer_cache_53_cast_fp16 = squeeze(axes = K_layer_cache_53_axes_0, x = var_14692_cast_fp16)[name = string("K_layer_cache_53_cast_fp16")]; + tensor var_14699_begin_0 = const()[name = string("op_14699_begin_0"), val = tensor([54, 0, 0, 0])]; + tensor var_14699_end_0 = const()[name = string("op_14699_end_0"), val = tensor([55, 8, 4096, 128])]; + tensor var_14699_end_mask_0 = const()[name = string("op_14699_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14699_cast_fp16 = slice_by_index(begin = var_14699_begin_0, end = var_14699_end_0, end_mask = var_14699_end_mask_0, x = coreml_update_state_109)[name = string("op_14699_cast_fp16")]; + tensor V_layer_cache_53_axes_0 = const()[name = string("V_layer_cache_53_axes_0"), val = tensor([0])]; + tensor V_layer_cache_53_cast_fp16 = squeeze(axes = V_layer_cache_53_axes_0, x = var_14699_cast_fp16)[name = string("V_layer_cache_53_cast_fp16")]; + tensor x_419_axes_0 = const()[name = string("x_419_axes_0"), val = tensor([1])]; + tensor x_419_cast_fp16 = expand_dims(axes = x_419_axes_0, x = K_layer_cache_53_cast_fp16)[name = string("x_419_cast_fp16")]; + tensor var_14736 = const()[name = string("op_14736"), val = tensor([1, 2, 1, 1])]; + tensor x_421_cast_fp16 = tile(reps = var_14736, x = x_419_cast_fp16)[name = string("x_421_cast_fp16")]; + tensor var_14748 = const()[name = string("op_14748"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_107_cast_fp16 = reshape(shape = var_14748, x = x_421_cast_fp16)[name = string("key_states_107_cast_fp16")]; + tensor x_425_axes_0 = const()[name = string("x_425_axes_0"), val = tensor([1])]; + tensor x_425_cast_fp16 = expand_dims(axes = x_425_axes_0, x = V_layer_cache_53_cast_fp16)[name = string("x_425_cast_fp16")]; + tensor var_14756 = const()[name = string("op_14756"), val = tensor([1, 2, 1, 1])]; + tensor x_427_cast_fp16 = tile(reps = var_14756, x = x_425_cast_fp16)[name = string("x_427_cast_fp16")]; + tensor var_14768 = const()[name = string("op_14768"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_159_cast_fp16 = reshape(shape = var_14768, x = x_427_cast_fp16)[name = string("value_states_159_cast_fp16")]; + bool var_14783_transpose_x_1 = const()[name = string("op_14783_transpose_x_1"), val = bool(false)]; + bool var_14783_transpose_y_1 = const()[name = string("op_14783_transpose_y_1"), val = bool(true)]; + tensor var_14783 = matmul(transpose_x = var_14783_transpose_x_1, transpose_y = var_14783_transpose_y_1, x = query_states_105, y = key_states_107_cast_fp16)[name = string("op_14783")]; + fp16 var_14784_to_fp16 = const()[name = string("op_14784_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_157_cast_fp16 = mul(x = var_14783, y = var_14784_to_fp16)[name = string("attn_weights_157_cast_fp16")]; + tensor attn_weights_159_cast_fp16 = add(x = attn_weights_157_cast_fp16, y = causal_mask)[name = string("attn_weights_159_cast_fp16")]; + int32 var_14819 = const()[name = string("op_14819"), val = int32(-1)]; + tensor attn_weights_161_cast_fp16 = softmax(axis = var_14819, x = attn_weights_159_cast_fp16)[name = string("attn_weights_161_cast_fp16")]; + bool attn_output_261_transpose_x_0 = const()[name = string("attn_output_261_transpose_x_0"), val = bool(false)]; + bool attn_output_261_transpose_y_0 = const()[name = string("attn_output_261_transpose_y_0"), val = bool(false)]; + tensor attn_output_261_cast_fp16 = matmul(transpose_x = attn_output_261_transpose_x_0, transpose_y = attn_output_261_transpose_y_0, x = attn_weights_161_cast_fp16, y = value_states_159_cast_fp16)[name = string("attn_output_261_cast_fp16")]; + tensor var_14830_perm_0 = const()[name = string("op_14830_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14834 = const()[name = string("op_14834"), val = tensor([1, 1, 2048])]; + tensor var_14830_cast_fp16 = transpose(perm = var_14830_perm_0, x = attn_output_261_cast_fp16)[name = string("transpose_10")]; + tensor attn_output_265_cast_fp16 = reshape(shape = var_14834, x = var_14830_cast_fp16)[name = string("attn_output_265_cast_fp16")]; + tensor var_14839 = const()[name = string("op_14839"), val = tensor([0, 2, 1])]; + string var_14855_pad_type_0 = const()[name = string("op_14855_pad_type_0"), val = string("valid")]; + int32 var_14855_groups_0 = const()[name = string("op_14855_groups_0"), val = int32(1)]; + tensor var_14855_strides_0 = const()[name = string("op_14855_strides_0"), val = tensor([1])]; + tensor var_14855_pad_0 = const()[name = string("op_14855_pad_0"), val = tensor([0, 0])]; + tensor var_14855_dilations_0 = const()[name = string("op_14855_dilations_0"), val = tensor([1])]; + tensor squeeze_26_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462445184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464542400))))[name = string("squeeze_26_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14840_cast_fp16 = transpose(perm = var_14839, x = attn_output_265_cast_fp16)[name = string("transpose_9")]; + tensor var_14855_cast_fp16 = conv(dilations = var_14855_dilations_0, groups = var_14855_groups_0, pad = var_14855_pad_0, pad_type = var_14855_pad_type_0, strides = var_14855_strides_0, weight = squeeze_26_cast_fp16_to_fp32_to_fp16_palettized, x = var_14840_cast_fp16)[name = string("op_14855_cast_fp16")]; + tensor var_14859 = const()[name = string("op_14859"), val = tensor([0, 2, 1])]; + tensor attn_output_269_cast_fp16 = transpose(perm = var_14859, x = var_14855_cast_fp16)[name = string("transpose_8")]; + tensor hidden_states_269_cast_fp16 = add(x = hidden_states_261_cast_fp16, y = attn_output_269_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; + int32 var_14872 = const()[name = string("op_14872"), val = int32(-1)]; + fp16 const_806_promoted_to_fp16 = const()[name = string("const_806_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14874_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_806_promoted_to_fp16)[name = string("op_14874_cast_fp16")]; + bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; + tensor input_479_cast_fp16 = concat(axis = var_14872, interleave = input_479_interleave_0, values = (hidden_states_269_cast_fp16, var_14874_cast_fp16))[name = string("input_479_cast_fp16")]; + tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; + fp16 var_14869_to_fp16 = const()[name = string("op_14869_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_14869_to_fp16, x = input_479_cast_fp16)[name = string("normed_429_cast_fp16")]; + tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; + tensor const_809_promoted_to_fp16 = const()[name = string("const_809_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464608000)))]; + tensor x_429_cast_fp16 = mul(x = normed_431_cast_fp16, y = const_809_promoted_to_fp16)[name = string("x_429_cast_fp16")]; + tensor var_14899 = const()[name = string("op_14899"), val = tensor([0, 2, 1])]; + tensor input_481_axes_0 = const()[name = string("input_481_axes_0"), val = tensor([2])]; + tensor var_14900 = transpose(perm = var_14899, x = x_429_cast_fp16)[name = string("transpose_7")]; + tensor input_481 = expand_dims(axes = input_481_axes_0, x = var_14900)[name = string("input_481")]; + string input_483_pad_type_0 = const()[name = string("input_483_pad_type_0"), val = string("valid")]; + tensor input_483_strides_0 = const()[name = string("input_483_strides_0"), val = tensor([1, 1])]; + tensor input_483_pad_0 = const()[name = string("input_483_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_483_dilations_0 = const()[name = string("input_483_dilations_0"), val = tensor([1, 1])]; + int32 input_483_groups_0 = const()[name = string("input_483_groups_0"), val = int32(1)]; + tensor input_483 = conv(dilations = input_483_dilations_0, groups = input_483_groups_0, pad = input_483_pad_0, pad_type = input_483_pad_type_0, strides = input_483_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_481)[name = string("input_483")]; + string b_53_pad_type_0 = const()[name = string("b_53_pad_type_0"), val = string("valid")]; + tensor b_53_strides_0 = const()[name = string("b_53_strides_0"), val = tensor([1, 1])]; + tensor b_53_pad_0 = const()[name = string("b_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_53_dilations_0 = const()[name = string("b_53_dilations_0"), val = tensor([1, 1])]; + int32 b_53_groups_0 = const()[name = string("b_53_groups_0"), val = int32(1)]; + tensor b_53 = conv(dilations = b_53_dilations_0, groups = b_53_groups_0, pad = b_53_pad_0, pad_type = b_53_pad_type_0, strides = b_53_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_481)[name = string("b_53")]; + tensor c_53 = silu(x = input_483)[name = string("c_53")]; + tensor input_485 = mul(x = c_53, y = b_53)[name = string("input_485")]; + string e_53_pad_type_0 = const()[name = string("e_53_pad_type_0"), val = string("valid")]; + tensor e_53_strides_0 = const()[name = string("e_53_strides_0"), val = tensor([1, 1])]; + tensor e_53_pad_0 = const()[name = string("e_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_53_dilations_0 = const()[name = string("e_53_dilations_0"), val = tensor([1, 1])]; + int32 e_53_groups_0 = const()[name = string("e_53_groups_0"), val = int32(1)]; + tensor e_53 = conv(dilations = e_53_dilations_0, groups = e_53_groups_0, pad = e_53_pad_0, pad_type = e_53_pad_type_0, strides = e_53_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input_485)[name = string("e_53")]; + tensor var_14922_axes_0 = const()[name = string("op_14922_axes_0"), val = tensor([2])]; + tensor var_14922 = squeeze(axes = var_14922_axes_0, x = e_53)[name = string("op_14922")]; + tensor var_14923 = const()[name = string("op_14923"), val = tensor([0, 2, 1])]; + tensor var_14924 = transpose(perm = var_14923, x = var_14922)[name = string("transpose_6")]; + tensor hidden_states_271_cast_fp16 = add(x = hidden_states_269_cast_fp16, y = var_14924)[name = string("hidden_states_271_cast_fp16")]; + int32 var_14936 = const()[name = string("op_14936"), val = int32(-1)]; + fp16 const_810_promoted_to_fp16 = const()[name = string("const_810_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14938_cast_fp16 = mul(x = hidden_states_271_cast_fp16, y = const_810_promoted_to_fp16)[name = string("op_14938_cast_fp16")]; + bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; + tensor input_487_cast_fp16 = concat(axis = var_14936, interleave = input_487_interleave_0, values = (hidden_states_271_cast_fp16, var_14938_cast_fp16))[name = string("input_487_cast_fp16")]; + tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; + fp16 var_14933_to_fp16 = const()[name = string("op_14933_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_14933_to_fp16, x = input_487_cast_fp16)[name = string("normed_433_cast_fp16")]; + tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; + tensor const_813_promoted_to_fp16 = const()[name = string("const_813_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464610112)))]; + tensor hidden_states_273_cast_fp16 = mul(x = normed_435_cast_fp16, y = const_813_promoted_to_fp16)[name = string("hidden_states_273_cast_fp16")]; + tensor var_14955 = const()[name = string("op_14955"), val = tensor([0, 2, 1])]; + tensor var_14958_axes_0 = const()[name = string("op_14958_axes_0"), val = tensor([2])]; + tensor var_14956_cast_fp16 = transpose(perm = var_14955, x = hidden_states_273_cast_fp16)[name = string("transpose_5")]; + tensor var_14958_cast_fp16 = expand_dims(axes = var_14958_axes_0, x = var_14956_cast_fp16)[name = string("op_14958_cast_fp16")]; + string var_14974_pad_type_0 = const()[name = string("op_14974_pad_type_0"), val = string("valid")]; + tensor var_14974_strides_0 = const()[name = string("op_14974_strides_0"), val = tensor([1, 1])]; + tensor var_14974_pad_0 = const()[name = string("op_14974_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14974_dilations_0 = const()[name = string("op_14974_dilations_0"), val = tensor([1, 1])]; + int32 var_14974_groups_0 = const()[name = string("op_14974_groups_0"), val = int32(1)]; + tensor var_14974 = conv(dilations = var_14974_dilations_0, groups = var_14974_groups_0, pad = var_14974_pad_0, pad_type = var_14974_pad_type_0, strides = var_14974_strides_0, weight = model_model_layers_27_self_attn_q_proj_weight_palettized, x = var_14958_cast_fp16)[name = string("op_14974")]; + tensor var_14979 = const()[name = string("op_14979"), val = tensor([1, 16, 1, 128])]; + tensor var_14980 = reshape(shape = var_14979, x = var_14974)[name = string("op_14980")]; + string var_14996_pad_type_0 = const()[name = string("op_14996_pad_type_0"), val = string("valid")]; + tensor var_14996_strides_0 = const()[name = string("op_14996_strides_0"), val = tensor([1, 1])]; + tensor var_14996_pad_0 = const()[name = string("op_14996_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14996_dilations_0 = const()[name = string("op_14996_dilations_0"), val = tensor([1, 1])]; + int32 var_14996_groups_0 = const()[name = string("op_14996_groups_0"), val = int32(1)]; + tensor var_14996 = conv(dilations = var_14996_dilations_0, groups = var_14996_groups_0, pad = var_14996_pad_0, pad_type = var_14996_pad_type_0, strides = var_14996_strides_0, weight = model_model_layers_27_self_attn_k_proj_weight_palettized, x = var_14958_cast_fp16)[name = string("op_14996")]; + tensor var_15001 = const()[name = string("op_15001"), val = tensor([1, 8, 1, 128])]; + tensor var_15002 = reshape(shape = var_15001, x = var_14996)[name = string("op_15002")]; + string var_15018_pad_type_0 = const()[name = string("op_15018_pad_type_0"), val = string("valid")]; + tensor var_15018_strides_0 = const()[name = string("op_15018_strides_0"), val = tensor([1, 1])]; + tensor var_15018_pad_0 = const()[name = string("op_15018_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15018_dilations_0 = const()[name = string("op_15018_dilations_0"), val = tensor([1, 1])]; + int32 var_15018_groups_0 = const()[name = string("op_15018_groups_0"), val = int32(1)]; + tensor var_15018 = conv(dilations = var_15018_dilations_0, groups = var_15018_groups_0, pad = var_15018_pad_0, pad_type = var_15018_pad_type_0, strides = var_15018_strides_0, weight = model_model_layers_27_self_attn_v_proj_weight_palettized, x = var_14958_cast_fp16)[name = string("op_15018")]; + tensor var_15023 = const()[name = string("op_15023"), val = tensor([1, 8, 1, 128])]; + tensor var_15024 = reshape(shape = var_15023, x = var_15018)[name = string("op_15024")]; + int32 var_15039 = const()[name = string("op_15039"), val = int32(-1)]; + fp16 const_814_promoted = const()[name = string("const_814_promoted"), val = fp16(-0x1p+0)]; + tensor var_15041 = mul(x = var_14980, y = const_814_promoted)[name = string("op_15041")]; + bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; + tensor input_491 = concat(axis = var_15039, interleave = input_491_interleave_0, values = (var_14980, var_15041))[name = string("input_491")]; + tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; + fp16 var_15036_to_fp16 = const()[name = string("op_15036_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_15036_to_fp16, x = input_491)[name = string("normed_437_cast_fp16")]; + tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 16, 1, 128])]; + tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; + tensor const_817 = const()[name = string("const_817"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464612224)))]; + tensor q = mul(x = normed_439, y = const_817)[name = string("q")]; + int32 var_15064 = const()[name = string("op_15064"), val = int32(-1)]; + fp16 const_818_promoted = const()[name = string("const_818_promoted"), val = fp16(-0x1p+0)]; + tensor var_15066 = mul(x = var_15002, y = const_818_promoted)[name = string("op_15066")]; + bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; + tensor input_493 = concat(axis = var_15064, interleave = input_493_interleave_0, values = (var_15002, var_15066))[name = string("input_493")]; + tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; + fp16 var_15061_to_fp16 = const()[name = string("op_15061_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_15061_to_fp16, x = input_493)[name = string("normed_441_cast_fp16")]; + tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 8, 1, 128])]; + tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; + tensor const_821 = const()[name = string("const_821"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464612544)))]; + tensor k = mul(x = normed_443, y = const_821)[name = string("k")]; + tensor var_15080 = mul(x = q, y = cos_1_cast_fp16)[name = string("op_15080")]; + tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 1, 64])]; + tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_109 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q)[name = string("x1_109")]; + tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 1, 128])]; + tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_109 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q)[name = string("x2_109")]; + fp16 const_824_promoted = const()[name = string("const_824_promoted"), val = fp16(-0x1p+0)]; + tensor var_15101 = mul(x = x2_109, y = const_824_promoted)[name = string("op_15101")]; + int32 var_15103 = const()[name = string("op_15103"), val = int32(-1)]; + bool var_15104_interleave_0 = const()[name = string("op_15104_interleave_0"), val = bool(false)]; + tensor var_15104 = concat(axis = var_15103, interleave = var_15104_interleave_0, values = (var_15101, x1_109))[name = string("op_15104")]; + tensor var_15105 = mul(x = var_15104, y = sin_1_cast_fp16)[name = string("op_15105")]; + tensor query_states_109 = add(x = var_15080, y = var_15105)[name = string("query_states_109")]; + tensor var_15108 = mul(x = k, y = cos_1_cast_fp16)[name = string("op_15108")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k)[name = string("x2")]; + fp16 const_827_promoted = const()[name = string("const_827_promoted"), val = fp16(-0x1p+0)]; + tensor var_15129 = mul(x = x2, y = const_827_promoted)[name = string("op_15129")]; + int32 var_15131 = const()[name = string("op_15131"), val = int32(-1)]; + bool var_15132_interleave_0 = const()[name = string("op_15132_interleave_0"), val = bool(false)]; + tensor var_15132 = concat(axis = var_15131, interleave = var_15132_interleave_0, values = (var_15129, x1))[name = string("op_15132")]; + tensor var_15133 = mul(x = var_15132, y = sin_1_cast_fp16)[name = string("op_15133")]; + tensor key_states_109 = add(x = var_15108, y = var_15133)[name = string("key_states_109")]; + tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([27])]; + tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; + tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; + tensor expand_dims_328 = const()[name = string("expand_dims_328"), val = tensor([28])]; + int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; + bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; + tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_324, expand_dims_325, current_pos, expand_dims_327))[name = string("concat_218")]; + tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; + tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; + int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; + bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; + tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_328, concat_219_values1_0, var_1746, concat_219_values3_0))[name = string("concat_219")]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_55_stride_0, update = key_states_109, x = coreml_update_state_109)[name = string("model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_54_write_state")]; + tensor coreml_update_state_110 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_54")]; + tensor expand_dims_330 = const()[name = string("expand_dims_330"), val = tensor([55])]; + tensor expand_dims_331 = const()[name = string("expand_dims_331"), val = tensor([0])]; + tensor expand_dims_333 = const()[name = string("expand_dims_333"), val = tensor([0])]; + tensor expand_dims_334 = const()[name = string("expand_dims_334"), val = tensor([56])]; + int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; + bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; + tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_330, expand_dims_331, current_pos, expand_dims_333))[name = string("concat_222")]; + tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; + tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; + int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; + bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; + tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_334, concat_223_values1_0, var_1746, concat_223_values3_0))[name = string("concat_223")]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_56_stride_0, update = var_15024, x = coreml_update_state_110)[name = string("model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_55_write_state")]; + tensor coreml_update_state_111 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_55")]; + tensor var_15188_begin_0 = const()[name = string("op_15188_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor var_15188_end_0 = const()[name = string("op_15188_end_0"), val = tensor([28, 8, 4096, 128])]; + tensor var_15188_end_mask_0 = const()[name = string("op_15188_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15188_cast_fp16 = slice_by_index(begin = var_15188_begin_0, end = var_15188_end_0, end_mask = var_15188_end_mask_0, x = coreml_update_state_111)[name = string("op_15188_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_15188_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_15195_begin_0 = const()[name = string("op_15195_begin_0"), val = tensor([55, 0, 0, 0])]; + tensor var_15195_end_0 = const()[name = string("op_15195_end_0"), val = tensor([1, 8, 4096, 128])]; + tensor var_15195_end_mask_0 = const()[name = string("op_15195_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_15195_cast_fp16 = slice_by_index(begin = var_15195_begin_0, end = var_15195_end_0, end_mask = var_15195_end_mask_0, x = coreml_update_state_111)[name = string("op_15195_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_15195_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_435_axes_0 = const()[name = string("x_435_axes_0"), val = tensor([1])]; + tensor x_435_cast_fp16 = expand_dims(axes = x_435_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_435_cast_fp16")]; + tensor var_15232 = const()[name = string("op_15232"), val = tensor([1, 2, 1, 1])]; + tensor x_437_cast_fp16 = tile(reps = var_15232, x = x_435_cast_fp16)[name = string("x_437_cast_fp16")]; + tensor var_15244 = const()[name = string("op_15244"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_cast_fp16 = reshape(shape = var_15244, x = x_437_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor x_441_axes_0 = const()[name = string("x_441_axes_0"), val = tensor([1])]; + tensor x_441_cast_fp16 = expand_dims(axes = x_441_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_441_cast_fp16")]; + tensor var_15252 = const()[name = string("op_15252"), val = tensor([1, 2, 1, 1])]; + tensor x_443_cast_fp16 = tile(reps = var_15252, x = x_441_cast_fp16)[name = string("x_443_cast_fp16")]; + tensor var_15264 = const()[name = string("op_15264"), val = tensor([1, -1, 4096, 128])]; + tensor value_states_165_cast_fp16 = reshape(shape = var_15264, x = x_443_cast_fp16)[name = string("value_states_165_cast_fp16")]; + bool var_15279_transpose_x_1 = const()[name = string("op_15279_transpose_x_1"), val = bool(false)]; + bool var_15279_transpose_y_1 = const()[name = string("op_15279_transpose_y_1"), val = bool(true)]; + tensor var_15279 = matmul(transpose_x = var_15279_transpose_x_1, transpose_y = var_15279_transpose_y_1, x = query_states_109, y = key_states_cast_fp16)[name = string("op_15279")]; + fp16 var_15280_to_fp16 = const()[name = string("op_15280_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_163_cast_fp16 = mul(x = var_15279, y = var_15280_to_fp16)[name = string("attn_weights_163_cast_fp16")]; + tensor attn_weights_165_cast_fp16 = add(x = attn_weights_163_cast_fp16, y = causal_mask)[name = string("attn_weights_165_cast_fp16")]; + int32 var_15315 = const()[name = string("op_15315"), val = int32(-1)]; + tensor attn_weights_cast_fp16 = softmax(axis = var_15315, x = attn_weights_165_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_271_transpose_x_0 = const()[name = string("attn_output_271_transpose_x_0"), val = bool(false)]; + bool attn_output_271_transpose_y_0 = const()[name = string("attn_output_271_transpose_y_0"), val = bool(false)]; + tensor attn_output_271_cast_fp16 = matmul(transpose_x = attn_output_271_transpose_x_0, transpose_y = attn_output_271_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_165_cast_fp16)[name = string("attn_output_271_cast_fp16")]; + tensor var_15326_perm_0 = const()[name = string("op_15326_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_15330 = const()[name = string("op_15330"), val = tensor([1, 1, 2048])]; + tensor var_15326_cast_fp16 = transpose(perm = var_15326_perm_0, x = attn_output_271_cast_fp16)[name = string("transpose_4")]; + tensor attn_output_275_cast_fp16 = reshape(shape = var_15330, x = var_15326_cast_fp16)[name = string("attn_output_275_cast_fp16")]; + tensor var_15335 = const()[name = string("op_15335"), val = tensor([0, 2, 1])]; + string var_15351_pad_type_0 = const()[name = string("op_15351_pad_type_0"), val = string("valid")]; + int32 var_15351_groups_0 = const()[name = string("op_15351_groups_0"), val = int32(1)]; + tensor var_15351_strides_0 = const()[name = string("op_15351_strides_0"), val = tensor([1])]; + tensor var_15351_pad_0 = const()[name = string("op_15351_pad_0"), val = tensor([0, 0])]; + tensor var_15351_dilations_0 = const()[name = string("op_15351_dilations_0"), val = tensor([1])]; + tensor squeeze_27_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464612864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466710080))))[name = string("squeeze_27_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_15336_cast_fp16 = transpose(perm = var_15335, x = attn_output_275_cast_fp16)[name = string("transpose_3")]; + tensor var_15351_cast_fp16 = conv(dilations = var_15351_dilations_0, groups = var_15351_groups_0, pad = var_15351_pad_0, pad_type = var_15351_pad_type_0, strides = var_15351_strides_0, weight = squeeze_27_cast_fp16_to_fp32_to_fp16_palettized, x = var_15336_cast_fp16)[name = string("op_15351_cast_fp16")]; + tensor var_15355 = const()[name = string("op_15355"), val = tensor([0, 2, 1])]; + tensor attn_output_cast_fp16 = transpose(perm = var_15355, x = var_15351_cast_fp16)[name = string("transpose_2")]; + tensor hidden_states_279_cast_fp16 = add(x = hidden_states_271_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_279_cast_fp16")]; + int32 var_15368 = const()[name = string("op_15368"), val = int32(-1)]; + fp16 const_836_promoted_to_fp16 = const()[name = string("const_836_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15370_cast_fp16 = mul(x = hidden_states_279_cast_fp16, y = const_836_promoted_to_fp16)[name = string("op_15370_cast_fp16")]; + bool input_497_interleave_0 = const()[name = string("input_497_interleave_0"), val = bool(false)]; + tensor input_497_cast_fp16 = concat(axis = var_15368, interleave = input_497_interleave_0, values = (hidden_states_279_cast_fp16, var_15370_cast_fp16))[name = string("input_497_cast_fp16")]; + tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; + fp16 var_15365_to_fp16 = const()[name = string("op_15365_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_15365_to_fp16, x = input_497_cast_fp16)[name = string("normed_445_cast_fp16")]; + tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; + tensor const_839_promoted_to_fp16 = const()[name = string("const_839_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466775680)))]; + tensor x_445_cast_fp16 = mul(x = normed_447_cast_fp16, y = const_839_promoted_to_fp16)[name = string("x_445_cast_fp16")]; + tensor var_15395 = const()[name = string("op_15395"), val = tensor([0, 2, 1])]; + tensor input_499_axes_0 = const()[name = string("input_499_axes_0"), val = tensor([2])]; + tensor var_15396 = transpose(perm = var_15395, x = x_445_cast_fp16)[name = string("transpose_1")]; + tensor input_499 = expand_dims(axes = input_499_axes_0, x = var_15396)[name = string("input_499")]; + string input_501_pad_type_0 = const()[name = string("input_501_pad_type_0"), val = string("valid")]; + tensor input_501_strides_0 = const()[name = string("input_501_strides_0"), val = tensor([1, 1])]; + tensor input_501_pad_0 = const()[name = string("input_501_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_501_dilations_0 = const()[name = string("input_501_dilations_0"), val = tensor([1, 1])]; + int32 input_501_groups_0 = const()[name = string("input_501_groups_0"), val = int32(1)]; + tensor input_501 = conv(dilations = input_501_dilations_0, groups = input_501_groups_0, pad = input_501_pad_0, pad_type = input_501_pad_type_0, strides = input_501_strides_0, weight = model_model_layers_27_mlp_gate_proj_weight_palettized, x = input_499)[name = string("input_501")]; + string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; + tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; + tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; + int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; + tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_27_mlp_up_proj_weight_palettized, x = input_499)[name = string("b")]; + tensor c = silu(x = input_501)[name = string("c")]; + tensor input_503 = mul(x = c, y = b)[name = string("input_503")]; + string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; + tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; + tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; + int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; + tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_27_mlp_down_proj_weight_palettized, x = input_503)[name = string("e")]; + tensor var_15418_axes_0 = const()[name = string("op_15418_axes_0"), val = tensor([2])]; + tensor var_15418 = squeeze(axes = var_15418_axes_0, x = e)[name = string("op_15418")]; + tensor var_15419 = const()[name = string("op_15419"), val = tensor([0, 2, 1])]; + tensor var_15420 = transpose(perm = var_15419, x = var_15418)[name = string("transpose_0")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_279_cast_fp16, y = var_15420)[name = string("hidden_states_cast_fp16")]; + int32 var_15432 = const()[name = string("op_15432"), val = int32(-1)]; + fp16 const_840_promoted_to_fp16 = const()[name = string("const_840_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15434_cast_fp16 = mul(x = hidden_states_cast_fp16, y = const_840_promoted_to_fp16)[name = string("op_15434_cast_fp16")]; + bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; + tensor input_cast_fp16 = concat(axis = var_15432, interleave = input_interleave_0, values = (hidden_states_cast_fp16, var_15434_cast_fp16))[name = string("input_cast_fp16")]; + tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; + fp16 var_15429_to_fp16 = const()[name = string("op_15429_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_15429_to_fp16, x = input_cast_fp16)[name = string("normed_449_cast_fp16")]; + tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 1, 1024])]; + tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_cast_fp16")]; + tensor const_843_promoted_to_fp16 = const()[name = string("const_843_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466777792)))]; + tensor output_hidden_states = mul(x = normed_cast_fp16, y = const_843_promoted_to_fp16)[name = string("op_15447_cast_fp16")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (output_hidden_states); + func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2228416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3277056))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3342656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4391296))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4456896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7602688))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7799360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10945152))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11141824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14287616))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14353216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16450432))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16581568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17630208))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17695808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18744448))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18810048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21955840))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22152512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25298304))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25494976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28640768))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30803584))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30934720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31983360))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32048960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33097600))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33163200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36308992))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36505664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39651456))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39848128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42993920))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43059520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45156736))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45287872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46336512))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46402112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47450752))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47516352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50662144))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50858816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54004608))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54201280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57347072))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57412672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59509888))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59641024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60689664))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60755264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61803904))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61869504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65015296))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65211968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68357760))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68554432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71700224))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71765824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73863040))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73994176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75042816))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75108416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76157056))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76222656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79368448))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79565120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82710912))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82907584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86053376))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86118976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88216192))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88347328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89395968))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89461568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90510208))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90575808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93721600))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93918272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97064064))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97260736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100406528))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100472128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102569344))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102700480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103749120))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103814720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104863360))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104928960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108074752))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108271424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111417216))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111613888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114759680))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114825280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116922496))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117053632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118102272))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118167872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119216512))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119282112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122427904))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122624576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125770368))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125967040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129112832))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129178432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131275648))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131406784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132455424))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132521024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133569664))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133635264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136781056))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136977728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140123520))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140320192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143465984))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143531584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145628800))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145759936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146808576))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146874176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922816))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147988416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151134208))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151330880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154476672))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154673344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157819136))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157884736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159981952))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160113088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161161728))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161227328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162275968))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162341568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165487360))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165684032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168829824))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169026496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172172288))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172237888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174335104))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174466240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175514880))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175580480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176629120))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176694720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179840512))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180037184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183182976))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183379648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186525440))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186591040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188688256))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188819392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189868032))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189933632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190982272))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191047872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194193664))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194390336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197536128))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197732800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200878592))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200944192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203041408))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203172544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204221184))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204286784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205335424))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205401024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208546816))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208743488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211889280))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212085952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215231744))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215297344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217394560))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217525696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218574336))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218639936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219688576))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222899968))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223096640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226242432))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226439104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229584896))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229650496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231747712))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231878848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232927488))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232993088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234041728))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234107328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237253120))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237449792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240595584))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240792256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243938048))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244003648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246100864))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246232000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247280640))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247346240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248394880))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248460480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251606272))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251802944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254948736))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255145408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258291200))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258356800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260454016))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260585152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261633792))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261699392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262748032))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262813632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265959424))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266156096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269301888))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269498560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272644352))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272709952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274807168))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274938304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275986944))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276052544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277101184))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277166784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280312576))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280509248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283655040))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283851712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286997504))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287063104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289160320))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289291456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290340096))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290405696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291454336))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291519936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294665728))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294862400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298008192))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298204864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301350656))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301416256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303513472))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303644608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304693248))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304758848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305807488))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305873088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309018880))))[name = string("model_model_layers_21_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309215552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312361344))))[name = string("model_model_layers_21_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_21_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312558016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315703808))))[name = string("model_model_layers_21_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315769408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317866624))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317997760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319046400))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319112000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320160640))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320226240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323372032))))[name = string("model_model_layers_22_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323568704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326714496))))[name = string("model_model_layers_22_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_22_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326911168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330056960))))[name = string("model_model_layers_22_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330122560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332219776))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332350912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333399552))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333465152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334513792))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334579392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337725184))))[name = string("model_model_layers_23_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337921856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341067648))))[name = string("model_model_layers_23_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_23_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341264320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344410112))))[name = string("model_model_layers_23_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344475712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346572928))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346704064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347752704))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347818304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348866944))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348932544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352078336))))[name = string("model_model_layers_24_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352275008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355420800))))[name = string("model_model_layers_24_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_24_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355617472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358763264))))[name = string("model_model_layers_24_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358828864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360926080))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361057216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362105856))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362171456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363220096))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363285696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366431488))))[name = string("model_model_layers_25_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366628160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369773952))))[name = string("model_model_layers_25_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_25_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369970624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373116416))))[name = string("model_model_layers_25_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373182016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375279232))))[name = string("model_model_layers_26_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375410368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376459008))))[name = string("model_model_layers_26_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_26_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376524608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377573248))))[name = string("model_model_layers_26_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377638848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380784640))))[name = string("model_model_layers_26_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380981312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384127104))))[name = string("model_model_layers_26_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_26_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384323776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387469568))))[name = string("model_model_layers_26_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_27_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387535168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389632384))))[name = string("model_model_layers_27_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_27_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389763520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390812160))))[name = string("model_model_layers_27_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_27_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390877760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391926400))))[name = string("model_model_layers_27_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_27_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391992000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395137792))))[name = string("model_model_layers_27_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_27_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395334464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398480256))))[name = string("model_model_layers_27_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_27_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398676928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401822720))))[name = string("model_model_layers_27_mlp_down_proj_weight_palettized")]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(8192)]; + tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_1505_axis_0 = const()[name = string("op_1505_axis_0"), val = int32(1)]; + int32 var_1505_batch_dims_0 = const()[name = string("op_1505_batch_dims_0"), val = int32(0)]; + bool var_1505_validate_indices_0 = const()[name = string("op_1505_validate_indices_0"), val = bool(false)]; + tensor var_1497_to_fp16 = const()[name = string("op_1497_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403985536)))]; + string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; + tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_1")]; + tensor var_1505_cast_fp16_cast_uint16 = gather(axis = var_1505_axis_0, batch_dims = var_1505_batch_dims_0, indices = select_0_to_int16, validate_indices = var_1505_validate_indices_0, x = var_1497_to_fp16)[name = string("op_1505_cast_fp16_cast_uint16")]; + tensor var_1509 = const()[name = string("op_1509"), val = tensor([1, 64, 1, 128])]; + tensor cos_1_cast_fp16 = reshape(shape = var_1509, x = var_1505_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; + int32 var_1519_axis_0 = const()[name = string("op_1519_axis_0"), val = int32(1)]; + int32 var_1519_batch_dims_0 = const()[name = string("op_1519_batch_dims_0"), val = int32(0)]; + bool var_1519_validate_indices_0 = const()[name = string("op_1519_validate_indices_0"), val = bool(false)]; + tensor var_1511_to_fp16 = const()[name = string("op_1511_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401888320)))]; + string select_0_to_uint16_dtype_0 = const()[name = string("select_0_to_uint16_dtype_0"), val = string("uint16")]; + tensor select_0_to_uint16 = cast(dtype = select_0_to_uint16_dtype_0, x = select_0)[name = string("cast_0")]; + tensor var_1519_cast_fp16_cast_uint16 = gather(axis = var_1519_axis_0, batch_dims = var_1519_batch_dims_0, indices = select_0_to_uint16, validate_indices = var_1519_validate_indices_0, x = var_1511_to_fp16)[name = string("op_1519_cast_fp16_cast_uint16")]; + tensor var_1523 = const()[name = string("op_1523"), val = tensor([1, 64, 1, 128])]; + tensor sin_1_cast_fp16 = reshape(shape = var_1523, x = var_1519_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; + int32 var_1544 = const()[name = string("op_1544"), val = int32(-1)]; + fp16 const_1_promoted_to_fp16 = const()[name = string("const_1_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1546_cast_fp16 = mul(x = hidden_states, y = const_1_promoted_to_fp16)[name = string("op_1546_cast_fp16")]; + bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; + tensor input_1_cast_fp16 = concat(axis = var_1544, interleave = input_1_interleave_0, values = (hidden_states, var_1546_cast_fp16))[name = string("input_1_cast_fp16")]; + tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; + fp16 var_1541_to_fp16 = const()[name = string("op_1541_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1541_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; + tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_3_cast_fp16 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3_cast_fp16")]; + tensor const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406082752)))]; + tensor hidden_states_3_cast_fp16 = mul(x = normed_3_cast_fp16, y = const_4_promoted_to_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor var_1569 = const()[name = string("op_1569"), val = tensor([0, 2, 1])]; + tensor var_1572_axes_0 = const()[name = string("op_1572_axes_0"), val = tensor([2])]; + tensor var_1570_cast_fp16 = transpose(perm = var_1569, x = hidden_states_3_cast_fp16)[name = string("transpose_253")]; + tensor var_1572_cast_fp16 = expand_dims(axes = var_1572_axes_0, x = var_1570_cast_fp16)[name = string("op_1572_cast_fp16")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1572_cast_fp16)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1572_cast_fp16)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1572_cast_fp16)[name = string("value_states_1")]; + tensor var_1614 = const()[name = string("op_1614"), val = tensor([1, 16, 128, 64])]; + tensor var_1615 = reshape(shape = var_1614, x = query_states_1)[name = string("op_1615")]; + tensor var_1620 = const()[name = string("op_1620"), val = tensor([0, 1, 3, 2])]; + tensor var_1625 = const()[name = string("op_1625"), val = tensor([1, 8, 128, 64])]; + tensor var_1626 = reshape(shape = var_1625, x = key_states_1)[name = string("op_1626")]; + tensor var_1631 = const()[name = string("op_1631"), val = tensor([0, 1, 3, 2])]; + tensor var_1636 = const()[name = string("op_1636"), val = tensor([1, 8, 128, 64])]; + tensor var_1637 = reshape(shape = var_1636, x = value_states_1)[name = string("op_1637")]; + tensor var_1642 = const()[name = string("op_1642"), val = tensor([0, 1, 3, 2])]; + int32 var_1653 = const()[name = string("op_1653"), val = int32(-1)]; + fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_5 = transpose(perm = var_1620, x = var_1615)[name = string("transpose_252")]; + tensor var_1655 = mul(x = hidden_states_5, y = const_6_promoted)[name = string("op_1655")]; + bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; + tensor input_5 = concat(axis = var_1653, interleave = input_5_interleave_0, values = (hidden_states_5, var_1655))[name = string("input_5")]; + tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; + fp16 var_1650_to_fp16 = const()[name = string("op_1650_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1650_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; + tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; + tensor const_9 = const()[name = string("const_9"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406084864)))]; + tensor q_1 = mul(x = normed_7, y = const_9)[name = string("q_1")]; + int32 var_1678 = const()[name = string("op_1678"), val = int32(-1)]; + fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_7 = transpose(perm = var_1631, x = var_1626)[name = string("transpose_251")]; + tensor var_1680 = mul(x = hidden_states_7, y = const_10_promoted)[name = string("op_1680")]; + bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; + tensor input_7 = concat(axis = var_1678, interleave = input_7_interleave_0, values = (hidden_states_7, var_1680))[name = string("input_7")]; + tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; + fp16 var_1675_to_fp16 = const()[name = string("op_1675_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1675_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; + tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; + tensor const_13 = const()[name = string("const_13"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406085184)))]; + tensor k_1 = mul(x = normed_11, y = const_13)[name = string("k_1")]; + tensor var_1698 = const()[name = string("op_1698"), val = tensor([0, 2, 1, 3])]; + tensor var_1704 = const()[name = string("op_1704"), val = tensor([0, 2, 1, 3])]; + tensor cos_5 = transpose(perm = var_1698, x = cos_1_cast_fp16)[name = string("transpose_250")]; + tensor var_1706 = mul(x = q_1, y = cos_5)[name = string("op_1706")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1)[name = string("x2_1")]; + fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; + tensor var_1727 = mul(x = x2_1, y = const_16_promoted)[name = string("op_1727")]; + int32 var_1729 = const()[name = string("op_1729"), val = int32(-1)]; + bool var_1730_interleave_0 = const()[name = string("op_1730_interleave_0"), val = bool(false)]; + tensor var_1730 = concat(axis = var_1729, interleave = var_1730_interleave_0, values = (var_1727, x1_1))[name = string("op_1730")]; + tensor sin_5 = transpose(perm = var_1704, x = sin_1_cast_fp16)[name = string("transpose_249")]; + tensor var_1731 = mul(x = var_1730, y = sin_5)[name = string("op_1731")]; + tensor query_states_3 = add(x = var_1706, y = var_1731)[name = string("query_states_3")]; + tensor var_1734 = mul(x = k_1, y = cos_5)[name = string("op_1734")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1)[name = string("x2_3")]; + fp16 const_19_promoted = const()[name = string("const_19_promoted"), val = fp16(-0x1p+0)]; + tensor var_1755 = mul(x = x2_3, y = const_19_promoted)[name = string("op_1755")]; + int32 var_1757 = const()[name = string("op_1757"), val = int32(-1)]; + bool var_1758_interleave_0 = const()[name = string("op_1758_interleave_0"), val = bool(false)]; + tensor var_1758 = concat(axis = var_1757, interleave = var_1758_interleave_0, values = (var_1755, x1_3))[name = string("op_1758")]; + tensor var_1759 = mul(x = var_1758, y = sin_5)[name = string("op_1759")]; + tensor key_states_3 = add(x = var_1734, y = var_1759)[name = string("key_states_3")]; + tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; + tensor var_1781 = add(x = current_pos, y = seq_length_1)[name = string("op_1781")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1781, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = key_states_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_56_write_state")]; + tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_56")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([28])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([29])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1781, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_1642, x = var_1637)[name = string("transpose_248")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_56)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_57_write_state")]; + tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_57")]; + tensor var_1830_begin_0 = const()[name = string("op_1830_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1830_end_0 = const()[name = string("op_1830_end_0"), val = tensor([1, 8, 4096, 128])]; + tensor var_1830_end_mask_0 = const()[name = string("op_1830_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1830_cast_fp16 = slice_by_index(begin = var_1830_begin_0, end = var_1830_end_0, end_mask = var_1830_end_mask_0, x = coreml_update_state_57)[name = string("op_1830_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_1830_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_1837_begin_0 = const()[name = string("op_1837_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor var_1837_end_0 = const()[name = string("op_1837_end_0"), val = tensor([29, 8, 4096, 128])]; + tensor var_1837_end_mask_0 = const()[name = string("op_1837_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1837_cast_fp16 = slice_by_index(begin = var_1837_begin_0, end = var_1837_end_0, end_mask = var_1837_end_mask_0, x = coreml_update_state_57)[name = string("op_1837_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_1837_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_3_axes_0 = const()[name = string("x_3_axes_0"), val = tensor([1])]; + tensor x_3_cast_fp16 = expand_dims(axes = x_3_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_1866 = const()[name = string("op_1866"), val = tensor([1, 2, 1, 1])]; + tensor x_5_cast_fp16 = tile(reps = var_1866, x = x_3_cast_fp16)[name = string("x_5_cast_fp16")]; + tensor var_1878 = const()[name = string("op_1878"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_1878, x = x_5_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_9_axes_0 = const()[name = string("x_9_axes_0"), val = tensor([1])]; + tensor x_9_cast_fp16 = expand_dims(axes = x_9_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_1886 = const()[name = string("op_1886"), val = tensor([1, 2, 1, 1])]; + tensor x_11_cast_fp16 = tile(reps = var_1886, x = x_9_cast_fp16)[name = string("x_11_cast_fp16")]; + bool var_1913_transpose_x_0 = const()[name = string("op_1913_transpose_x_0"), val = bool(false)]; + bool var_1913_transpose_y_0 = const()[name = string("op_1913_transpose_y_0"), val = bool(true)]; + tensor var_1913 = matmul(transpose_x = var_1913_transpose_x_0, transpose_y = var_1913_transpose_y_0, x = query_states_3, y = key_states_7_cast_fp16)[name = string("op_1913")]; + fp16 var_1914_to_fp16 = const()[name = string("op_1914_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_1913, y = var_1914_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("attn_weights_3_cast_fp16")]; + int32 var_1949 = const()[name = string("op_1949"), val = int32(-1)]; + tensor var_1951_cast_fp16 = softmax(axis = var_1949, x = attn_weights_3_cast_fp16)[name = string("op_1951_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([16, 64, 4096])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_1951_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([16, 4096, 128])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_11_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 16, 64, 128])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_1963_perm_0 = const()[name = string("op_1963_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1982 = const()[name = string("op_1982"), val = tensor([1, 64, 2048])]; + tensor var_1963_cast_fp16 = transpose(perm = var_1963_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_247")]; + tensor attn_output_5_cast_fp16 = reshape(shape = var_1982, x = var_1963_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_1987 = const()[name = string("op_1987"), val = tensor([0, 2, 1])]; + string var_2003_pad_type_0 = const()[name = string("op_2003_pad_type_0"), val = string("valid")]; + int32 var_2003_groups_0 = const()[name = string("op_2003_groups_0"), val = int32(1)]; + tensor var_2003_strides_0 = const()[name = string("op_2003_strides_0"), val = tensor([1])]; + tensor var_2003_pad_0 = const()[name = string("op_2003_pad_0"), val = tensor([0, 0])]; + tensor var_2003_dilations_0 = const()[name = string("op_2003_dilations_0"), val = tensor([1])]; + tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406085504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408182720))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_1988_cast_fp16 = transpose(perm = var_1987, x = attn_output_5_cast_fp16)[name = string("transpose_246")]; + tensor var_2003_cast_fp16 = conv(dilations = var_2003_dilations_0, groups = var_2003_groups_0, pad = var_2003_pad_0, pad_type = var_2003_pad_type_0, strides = var_2003_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_1988_cast_fp16)[name = string("op_2003_cast_fp16")]; + tensor var_2007 = const()[name = string("op_2007"), val = tensor([0, 2, 1])]; + tensor attn_output_9_cast_fp16 = transpose(perm = var_2007, x = var_2003_cast_fp16)[name = string("transpose_245")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; + int32 var_2020 = const()[name = string("op_2020"), val = int32(-1)]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2022_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2022_cast_fp16")]; + bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; + tensor input_11_cast_fp16 = concat(axis = var_2020, interleave = input_11_interleave_0, values = (hidden_states_9_cast_fp16, var_2022_cast_fp16))[name = string("input_11_cast_fp16")]; + tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; + fp16 var_2017_to_fp16 = const()[name = string("op_2017_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2017_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; + tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408248320)))]; + tensor x_13_cast_fp16 = mul(x = normed_15_cast_fp16, y = const_34_promoted_to_fp16)[name = string("x_13_cast_fp16")]; + tensor var_2047 = const()[name = string("op_2047"), val = tensor([0, 2, 1])]; + tensor input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor([2])]; + tensor var_2048 = transpose(perm = var_2047, x = x_13_cast_fp16)[name = string("transpose_244")]; + tensor input_13 = expand_dims(axes = input_13_axes_0, x = var_2048)[name = string("input_13")]; + string input_15_pad_type_0 = const()[name = string("input_15_pad_type_0"), val = string("valid")]; + tensor input_15_strides_0 = const()[name = string("input_15_strides_0"), val = tensor([1, 1])]; + tensor input_15_pad_0 = const()[name = string("input_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_15_dilations_0 = const()[name = string("input_15_dilations_0"), val = tensor([1, 1])]; + int32 input_15_groups_0 = const()[name = string("input_15_groups_0"), val = int32(1)]; + tensor input_15 = conv(dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_13)[name = string("input_15")]; + string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; + tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; + tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; + int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; + tensor b_1 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_13)[name = string("b_1")]; + tensor c_1 = silu(x = input_15)[name = string("c_1")]; + tensor input_17 = mul(x = c_1, y = b_1)[name = string("input_17")]; + string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; + tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; + tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; + int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; + tensor e_1 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_17)[name = string("e_1")]; + tensor var_2070_axes_0 = const()[name = string("op_2070_axes_0"), val = tensor([2])]; + tensor var_2070 = squeeze(axes = var_2070_axes_0, x = e_1)[name = string("op_2070")]; + tensor var_2071 = const()[name = string("op_2071"), val = tensor([0, 2, 1])]; + tensor var_2072 = transpose(perm = var_2071, x = var_2070)[name = string("transpose_243")]; + tensor hidden_states_11_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = var_2072)[name = string("hidden_states_11_cast_fp16")]; + int32 var_2084 = const()[name = string("op_2084"), val = int32(-1)]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2086_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2086_cast_fp16")]; + bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; + tensor input_19_cast_fp16 = concat(axis = var_2084, interleave = input_19_interleave_0, values = (hidden_states_11_cast_fp16, var_2086_cast_fp16))[name = string("input_19_cast_fp16")]; + tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; + fp16 var_2081_to_fp16 = const()[name = string("op_2081_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2081_to_fp16, x = input_19_cast_fp16)[name = string("normed_17_cast_fp16")]; + tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; + tensor const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408250432)))]; + tensor hidden_states_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = const_38_promoted_to_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor var_2109 = const()[name = string("op_2109"), val = tensor([0, 2, 1])]; + tensor var_2112_axes_0 = const()[name = string("op_2112_axes_0"), val = tensor([2])]; + tensor var_2110_cast_fp16 = transpose(perm = var_2109, x = hidden_states_13_cast_fp16)[name = string("transpose_242")]; + tensor var_2112_cast_fp16 = expand_dims(axes = var_2112_axes_0, x = var_2110_cast_fp16)[name = string("op_2112_cast_fp16")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2112_cast_fp16)[name = string("query_states_9")]; + string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; + tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; + tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; + int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; + tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2112_cast_fp16)[name = string("key_states_11")]; + string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; + tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; + tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; + int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; + tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2112_cast_fp16)[name = string("value_states_9")]; + tensor var_2154 = const()[name = string("op_2154"), val = tensor([1, 16, 128, 64])]; + tensor var_2155 = reshape(shape = var_2154, x = query_states_9)[name = string("op_2155")]; + tensor var_2160 = const()[name = string("op_2160"), val = tensor([0, 1, 3, 2])]; + tensor var_2165 = const()[name = string("op_2165"), val = tensor([1, 8, 128, 64])]; + tensor var_2166 = reshape(shape = var_2165, x = key_states_11)[name = string("op_2166")]; + tensor var_2171 = const()[name = string("op_2171"), val = tensor([0, 1, 3, 2])]; + tensor var_2176 = const()[name = string("op_2176"), val = tensor([1, 8, 128, 64])]; + tensor var_2177 = reshape(shape = var_2176, x = value_states_9)[name = string("op_2177")]; + tensor var_2182 = const()[name = string("op_2182"), val = tensor([0, 1, 3, 2])]; + int32 var_2193 = const()[name = string("op_2193"), val = int32(-1)]; + fp16 const_40_promoted = const()[name = string("const_40_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_15 = transpose(perm = var_2160, x = var_2155)[name = string("transpose_241")]; + tensor var_2195 = mul(x = hidden_states_15, y = const_40_promoted)[name = string("op_2195")]; + bool input_23_interleave_0 = const()[name = string("input_23_interleave_0"), val = bool(false)]; + tensor input_23 = concat(axis = var_2193, interleave = input_23_interleave_0, values = (hidden_states_15, var_2195))[name = string("input_23")]; + tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; + fp16 var_2190_to_fp16 = const()[name = string("op_2190_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2190_to_fp16, x = input_23)[name = string("normed_21_cast_fp16")]; + tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_23 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23")]; + tensor const_43 = const()[name = string("const_43"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408252544)))]; + tensor q_3 = mul(x = normed_23, y = const_43)[name = string("q_3")]; + int32 var_2218 = const()[name = string("op_2218"), val = int32(-1)]; + fp16 const_44_promoted = const()[name = string("const_44_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_17 = transpose(perm = var_2171, x = var_2166)[name = string("transpose_240")]; + tensor var_2220 = mul(x = hidden_states_17, y = const_44_promoted)[name = string("op_2220")]; + bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; + tensor input_25 = concat(axis = var_2218, interleave = input_25_interleave_0, values = (hidden_states_17, var_2220))[name = string("input_25")]; + tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; + fp16 var_2215_to_fp16 = const()[name = string("op_2215_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2215_to_fp16, x = input_25)[name = string("normed_25_cast_fp16")]; + tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_27 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27")]; + tensor const_47 = const()[name = string("const_47"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408252864)))]; + tensor k_3 = mul(x = normed_27, y = const_47)[name = string("k_3")]; + tensor var_2246 = mul(x = q_3, y = cos_5)[name = string("op_2246")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3)[name = string("x2_5")]; + fp16 const_50_promoted = const()[name = string("const_50_promoted"), val = fp16(-0x1p+0)]; + tensor var_2267 = mul(x = x2_5, y = const_50_promoted)[name = string("op_2267")]; + int32 var_2269 = const()[name = string("op_2269"), val = int32(-1)]; + bool var_2270_interleave_0 = const()[name = string("op_2270_interleave_0"), val = bool(false)]; + tensor var_2270 = concat(axis = var_2269, interleave = var_2270_interleave_0, values = (var_2267, x1_5))[name = string("op_2270")]; + tensor var_2271 = mul(x = var_2270, y = sin_5)[name = string("op_2271")]; + tensor query_states_11 = add(x = var_2246, y = var_2271)[name = string("query_states_11")]; + tensor var_2274 = mul(x = k_3, y = cos_5)[name = string("op_2274")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3)[name = string("x2_7")]; + fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; + tensor var_2295 = mul(x = x2_7, y = const_53_promoted)[name = string("op_2295")]; + int32 var_2297 = const()[name = string("op_2297"), val = int32(-1)]; + bool var_2298_interleave_0 = const()[name = string("op_2298_interleave_0"), val = bool(false)]; + tensor var_2298 = concat(axis = var_2297, interleave = var_2298_interleave_0, values = (var_2295, x1_7))[name = string("op_2298")]; + tensor var_2299 = mul(x = var_2298, y = sin_5)[name = string("op_2299")]; + tensor key_states_13 = add(x = var_2274, y = var_2299)[name = string("key_states_13")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_1781, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = key_states_13, x = coreml_update_state_57)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_58_write_state")]; + tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_58")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([29])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([30])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_1781, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_11 = transpose(perm = var_2182, x = var_2177)[name = string("transpose_239")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_11, x = coreml_update_state_58)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_59_write_state")]; + tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_59")]; + tensor var_2370_begin_0 = const()[name = string("op_2370_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_2370_end_0 = const()[name = string("op_2370_end_0"), val = tensor([2, 8, 4096, 128])]; + tensor var_2370_end_mask_0 = const()[name = string("op_2370_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = coreml_update_state_59)[name = string("op_2370_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_2370_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_2377_begin_0 = const()[name = string("op_2377_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor var_2377_end_0 = const()[name = string("op_2377_end_0"), val = tensor([30, 8, 4096, 128])]; + tensor var_2377_end_mask_0 = const()[name = string("op_2377_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2377_cast_fp16 = slice_by_index(begin = var_2377_begin_0, end = var_2377_end_0, end_mask = var_2377_end_mask_0, x = coreml_update_state_59)[name = string("op_2377_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_2377_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_19_axes_0 = const()[name = string("x_19_axes_0"), val = tensor([1])]; + tensor x_19_cast_fp16 = expand_dims(axes = x_19_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_2406 = const()[name = string("op_2406"), val = tensor([1, 2, 1, 1])]; + tensor x_21_cast_fp16 = tile(reps = var_2406, x = x_19_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor var_2418 = const()[name = string("op_2418"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_17_cast_fp16 = reshape(shape = var_2418, x = x_21_cast_fp16)[name = string("key_states_17_cast_fp16")]; + tensor x_25_axes_0 = const()[name = string("x_25_axes_0"), val = tensor([1])]; + tensor x_25_cast_fp16 = expand_dims(axes = x_25_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_2426 = const()[name = string("op_2426"), val = tensor([1, 2, 1, 1])]; + tensor x_27_cast_fp16 = tile(reps = var_2426, x = x_25_cast_fp16)[name = string("x_27_cast_fp16")]; + bool var_2453_transpose_x_0 = const()[name = string("op_2453_transpose_x_0"), val = bool(false)]; + bool var_2453_transpose_y_0 = const()[name = string("op_2453_transpose_y_0"), val = bool(true)]; + tensor var_2453 = matmul(transpose_x = var_2453_transpose_x_0, transpose_y = var_2453_transpose_y_0, x = query_states_11, y = key_states_17_cast_fp16)[name = string("op_2453")]; + fp16 var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_2453, y = var_2454_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("attn_weights_7_cast_fp16")]; + int32 var_2489 = const()[name = string("op_2489"), val = int32(-1)]; + tensor var_2491_cast_fp16 = softmax(axis = var_2489, x = attn_weights_7_cast_fp16)[name = string("op_2491_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([16, 64, 4096])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_2491_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([16, 4096, 128])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_27_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 16, 64, 128])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_2503_perm_0 = const()[name = string("op_2503_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_2522 = const()[name = string("op_2522"), val = tensor([1, 64, 2048])]; + tensor var_2503_cast_fp16 = transpose(perm = var_2503_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_238")]; + tensor attn_output_15_cast_fp16 = reshape(shape = var_2522, x = var_2503_cast_fp16)[name = string("attn_output_15_cast_fp16")]; + tensor var_2527 = const()[name = string("op_2527"), val = tensor([0, 2, 1])]; + string var_2543_pad_type_0 = const()[name = string("op_2543_pad_type_0"), val = string("valid")]; + int32 var_2543_groups_0 = const()[name = string("op_2543_groups_0"), val = int32(1)]; + tensor var_2543_strides_0 = const()[name = string("op_2543_strides_0"), val = tensor([1])]; + tensor var_2543_pad_0 = const()[name = string("op_2543_pad_0"), val = tensor([0, 0])]; + tensor var_2543_dilations_0 = const()[name = string("op_2543_dilations_0"), val = tensor([1])]; + tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408253184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410350400))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_2528_cast_fp16 = transpose(perm = var_2527, x = attn_output_15_cast_fp16)[name = string("transpose_237")]; + tensor var_2543_cast_fp16 = conv(dilations = var_2543_dilations_0, groups = var_2543_groups_0, pad = var_2543_pad_0, pad_type = var_2543_pad_type_0, strides = var_2543_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2528_cast_fp16)[name = string("op_2543_cast_fp16")]; + tensor var_2547 = const()[name = string("op_2547"), val = tensor([0, 2, 1])]; + tensor attn_output_19_cast_fp16 = transpose(perm = var_2547, x = var_2543_cast_fp16)[name = string("transpose_236")]; + tensor hidden_states_19_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + int32 var_2560 = const()[name = string("op_2560"), val = int32(-1)]; + fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2562_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = const_65_promoted_to_fp16)[name = string("op_2562_cast_fp16")]; + bool input_29_interleave_0 = const()[name = string("input_29_interleave_0"), val = bool(false)]; + tensor input_29_cast_fp16 = concat(axis = var_2560, interleave = input_29_interleave_0, values = (hidden_states_19_cast_fp16, var_2562_cast_fp16))[name = string("input_29_cast_fp16")]; + tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; + fp16 var_2557_to_fp16 = const()[name = string("op_2557_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2557_to_fp16, x = input_29_cast_fp16)[name = string("normed_29_cast_fp16")]; + tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_31_cast_fp16 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31_cast_fp16")]; + tensor const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410416000)))]; + tensor x_29_cast_fp16 = mul(x = normed_31_cast_fp16, y = const_68_promoted_to_fp16)[name = string("x_29_cast_fp16")]; + tensor var_2587 = const()[name = string("op_2587"), val = tensor([0, 2, 1])]; + tensor input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor([2])]; + tensor var_2588 = transpose(perm = var_2587, x = x_29_cast_fp16)[name = string("transpose_235")]; + tensor input_31 = expand_dims(axes = input_31_axes_0, x = var_2588)[name = string("input_31")]; + string input_33_pad_type_0 = const()[name = string("input_33_pad_type_0"), val = string("valid")]; + tensor input_33_strides_0 = const()[name = string("input_33_strides_0"), val = tensor([1, 1])]; + tensor input_33_pad_0 = const()[name = string("input_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_33_dilations_0 = const()[name = string("input_33_dilations_0"), val = tensor([1, 1])]; + int32 input_33_groups_0 = const()[name = string("input_33_groups_0"), val = int32(1)]; + tensor input_33 = conv(dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_31)[name = string("input_33")]; + string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; + tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; + tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; + int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; + tensor b_3 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_31)[name = string("b_3")]; + tensor c_3 = silu(x = input_33)[name = string("c_3")]; + tensor input_35 = mul(x = c_3, y = b_3)[name = string("input_35")]; + string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; + tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; + tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; + int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; + tensor e_3 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_35)[name = string("e_3")]; + tensor var_2610_axes_0 = const()[name = string("op_2610_axes_0"), val = tensor([2])]; + tensor var_2610 = squeeze(axes = var_2610_axes_0, x = e_3)[name = string("op_2610")]; + tensor var_2611 = const()[name = string("op_2611"), val = tensor([0, 2, 1])]; + tensor var_2612 = transpose(perm = var_2611, x = var_2610)[name = string("transpose_234")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = var_2612)[name = string("hidden_states_21_cast_fp16")]; + int32 var_2624 = const()[name = string("op_2624"), val = int32(-1)]; + fp16 const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2626_cast_fp16 = mul(x = hidden_states_21_cast_fp16, y = const_69_promoted_to_fp16)[name = string("op_2626_cast_fp16")]; + bool input_37_interleave_0 = const()[name = string("input_37_interleave_0"), val = bool(false)]; + tensor input_37_cast_fp16 = concat(axis = var_2624, interleave = input_37_interleave_0, values = (hidden_states_21_cast_fp16, var_2626_cast_fp16))[name = string("input_37_cast_fp16")]; + tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; + fp16 var_2621_to_fp16 = const()[name = string("op_2621_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2621_to_fp16, x = input_37_cast_fp16)[name = string("normed_33_cast_fp16")]; + tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_35_cast_fp16 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35_cast_fp16")]; + tensor const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410418112)))]; + tensor hidden_states_23_cast_fp16 = mul(x = normed_35_cast_fp16, y = const_72_promoted_to_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor var_2649 = const()[name = string("op_2649"), val = tensor([0, 2, 1])]; + tensor var_2652_axes_0 = const()[name = string("op_2652_axes_0"), val = tensor([2])]; + tensor var_2650_cast_fp16 = transpose(perm = var_2649, x = hidden_states_23_cast_fp16)[name = string("transpose_233")]; + tensor var_2652_cast_fp16 = expand_dims(axes = var_2652_axes_0, x = var_2650_cast_fp16)[name = string("op_2652_cast_fp16")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2652_cast_fp16)[name = string("query_states_17")]; + string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; + tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; + tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; + int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; + tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2652_cast_fp16)[name = string("key_states_21")]; + string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; + tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; + tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; + int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; + tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2652_cast_fp16)[name = string("value_states_17")]; + tensor var_2694 = const()[name = string("op_2694"), val = tensor([1, 16, 128, 64])]; + tensor var_2695 = reshape(shape = var_2694, x = query_states_17)[name = string("op_2695")]; + tensor var_2700 = const()[name = string("op_2700"), val = tensor([0, 1, 3, 2])]; + tensor var_2705 = const()[name = string("op_2705"), val = tensor([1, 8, 128, 64])]; + tensor var_2706 = reshape(shape = var_2705, x = key_states_21)[name = string("op_2706")]; + tensor var_2711 = const()[name = string("op_2711"), val = tensor([0, 1, 3, 2])]; + tensor var_2716 = const()[name = string("op_2716"), val = tensor([1, 8, 128, 64])]; + tensor var_2717 = reshape(shape = var_2716, x = value_states_17)[name = string("op_2717")]; + tensor var_2722 = const()[name = string("op_2722"), val = tensor([0, 1, 3, 2])]; + int32 var_2733 = const()[name = string("op_2733"), val = int32(-1)]; + fp16 const_74_promoted = const()[name = string("const_74_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_25 = transpose(perm = var_2700, x = var_2695)[name = string("transpose_232")]; + tensor var_2735 = mul(x = hidden_states_25, y = const_74_promoted)[name = string("op_2735")]; + bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; + tensor input_41 = concat(axis = var_2733, interleave = input_41_interleave_0, values = (hidden_states_25, var_2735))[name = string("input_41")]; + tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; + fp16 var_2730_to_fp16 = const()[name = string("op_2730_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2730_to_fp16, x = input_41)[name = string("normed_37_cast_fp16")]; + tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_39 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39")]; + tensor const_77 = const()[name = string("const_77"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410420224)))]; + tensor q_5 = mul(x = normed_39, y = const_77)[name = string("q_5")]; + int32 var_2758 = const()[name = string("op_2758"), val = int32(-1)]; + fp16 const_78_promoted = const()[name = string("const_78_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_27 = transpose(perm = var_2711, x = var_2706)[name = string("transpose_231")]; + tensor var_2760 = mul(x = hidden_states_27, y = const_78_promoted)[name = string("op_2760")]; + bool input_43_interleave_0 = const()[name = string("input_43_interleave_0"), val = bool(false)]; + tensor input_43 = concat(axis = var_2758, interleave = input_43_interleave_0, values = (hidden_states_27, var_2760))[name = string("input_43")]; + tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; + fp16 var_2755_to_fp16 = const()[name = string("op_2755_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2755_to_fp16, x = input_43)[name = string("normed_41_cast_fp16")]; + tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_43 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43")]; + tensor const_81 = const()[name = string("const_81"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410420544)))]; + tensor k_5 = mul(x = normed_43, y = const_81)[name = string("k_5")]; + tensor var_2786 = mul(x = q_5, y = cos_5)[name = string("op_2786")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5)[name = string("x2_9")]; + fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; + tensor var_2807 = mul(x = x2_9, y = const_84_promoted)[name = string("op_2807")]; + int32 var_2809 = const()[name = string("op_2809"), val = int32(-1)]; + bool var_2810_interleave_0 = const()[name = string("op_2810_interleave_0"), val = bool(false)]; + tensor var_2810 = concat(axis = var_2809, interleave = var_2810_interleave_0, values = (var_2807, x1_9))[name = string("op_2810")]; + tensor var_2811 = mul(x = var_2810, y = sin_5)[name = string("op_2811")]; + tensor query_states_19 = add(x = var_2786, y = var_2811)[name = string("query_states_19")]; + tensor var_2814 = mul(x = k_5, y = cos_5)[name = string("op_2814")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5)[name = string("x2_11")]; + fp16 const_87_promoted = const()[name = string("const_87_promoted"), val = fp16(-0x1p+0)]; + tensor var_2835 = mul(x = x2_11, y = const_87_promoted)[name = string("op_2835")]; + int32 var_2837 = const()[name = string("op_2837"), val = int32(-1)]; + bool var_2838_interleave_0 = const()[name = string("op_2838_interleave_0"), val = bool(false)]; + tensor var_2838 = concat(axis = var_2837, interleave = var_2838_interleave_0, values = (var_2835, x1_11))[name = string("op_2838")]; + tensor var_2839 = mul(x = var_2838, y = sin_5)[name = string("op_2839")]; + tensor key_states_23 = add(x = var_2814, y = var_2839)[name = string("key_states_23")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_1781, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = key_states_23, x = coreml_update_state_59)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_60")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([30])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([31])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_1781, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_19 = transpose(perm = var_2722, x = var_2717)[name = string("transpose_230")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_19, x = coreml_update_state_60)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_61")]; + tensor var_2910_begin_0 = const()[name = string("op_2910_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_2910_end_0 = const()[name = string("op_2910_end_0"), val = tensor([3, 8, 4096, 128])]; + tensor var_2910_end_mask_0 = const()[name = string("op_2910_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2910_cast_fp16 = slice_by_index(begin = var_2910_begin_0, end = var_2910_end_0, end_mask = var_2910_end_mask_0, x = coreml_update_state_61)[name = string("op_2910_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_2910_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_2917_begin_0 = const()[name = string("op_2917_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor var_2917_end_0 = const()[name = string("op_2917_end_0"), val = tensor([31, 8, 4096, 128])]; + tensor var_2917_end_mask_0 = const()[name = string("op_2917_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_2917_cast_fp16 = slice_by_index(begin = var_2917_begin_0, end = var_2917_end_0, end_mask = var_2917_end_mask_0, x = coreml_update_state_61)[name = string("op_2917_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_2917_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_35_axes_0 = const()[name = string("x_35_axes_0"), val = tensor([1])]; + tensor x_35_cast_fp16 = expand_dims(axes = x_35_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_2946 = const()[name = string("op_2946"), val = tensor([1, 2, 1, 1])]; + tensor x_37_cast_fp16 = tile(reps = var_2946, x = x_35_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_2958 = const()[name = string("op_2958"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_27_cast_fp16 = reshape(shape = var_2958, x = x_37_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor x_41_axes_0 = const()[name = string("x_41_axes_0"), val = tensor([1])]; + tensor x_41_cast_fp16 = expand_dims(axes = x_41_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_2966 = const()[name = string("op_2966"), val = tensor([1, 2, 1, 1])]; + tensor x_43_cast_fp16 = tile(reps = var_2966, x = x_41_cast_fp16)[name = string("x_43_cast_fp16")]; + bool var_2993_transpose_x_0 = const()[name = string("op_2993_transpose_x_0"), val = bool(false)]; + bool var_2993_transpose_y_0 = const()[name = string("op_2993_transpose_y_0"), val = bool(true)]; + tensor var_2993 = matmul(transpose_x = var_2993_transpose_x_0, transpose_y = var_2993_transpose_y_0, x = query_states_19, y = key_states_27_cast_fp16)[name = string("op_2993")]; + fp16 var_2994_to_fp16 = const()[name = string("op_2994_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_2993, y = var_2994_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("attn_weights_11_cast_fp16")]; + int32 var_3029 = const()[name = string("op_3029"), val = int32(-1)]; + tensor var_3031_cast_fp16 = softmax(axis = var_3029, x = attn_weights_11_cast_fp16)[name = string("op_3031_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([16, 64, 4096])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_3031_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([16, 4096, 128])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_43_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 16, 64, 128])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_3043_perm_0 = const()[name = string("op_3043_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3062 = const()[name = string("op_3062"), val = tensor([1, 64, 2048])]; + tensor var_3043_cast_fp16 = transpose(perm = var_3043_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_229")]; + tensor attn_output_25_cast_fp16 = reshape(shape = var_3062, x = var_3043_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_3067 = const()[name = string("op_3067"), val = tensor([0, 2, 1])]; + string var_3083_pad_type_0 = const()[name = string("op_3083_pad_type_0"), val = string("valid")]; + int32 var_3083_groups_0 = const()[name = string("op_3083_groups_0"), val = int32(1)]; + tensor var_3083_strides_0 = const()[name = string("op_3083_strides_0"), val = tensor([1])]; + tensor var_3083_pad_0 = const()[name = string("op_3083_pad_0"), val = tensor([0, 0])]; + tensor var_3083_dilations_0 = const()[name = string("op_3083_dilations_0"), val = tensor([1])]; + tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410420864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412518080))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3068_cast_fp16 = transpose(perm = var_3067, x = attn_output_25_cast_fp16)[name = string("transpose_228")]; + tensor var_3083_cast_fp16 = conv(dilations = var_3083_dilations_0, groups = var_3083_groups_0, pad = var_3083_pad_0, pad_type = var_3083_pad_type_0, strides = var_3083_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3068_cast_fp16)[name = string("op_3083_cast_fp16")]; + tensor var_3087 = const()[name = string("op_3087"), val = tensor([0, 2, 1])]; + tensor attn_output_29_cast_fp16 = transpose(perm = var_3087, x = var_3083_cast_fp16)[name = string("transpose_227")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + int32 var_3100 = const()[name = string("op_3100"), val = int32(-1)]; + fp16 const_99_promoted_to_fp16 = const()[name = string("const_99_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3102_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_99_promoted_to_fp16)[name = string("op_3102_cast_fp16")]; + bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; + tensor input_47_cast_fp16 = concat(axis = var_3100, interleave = input_47_interleave_0, values = (hidden_states_29_cast_fp16, var_3102_cast_fp16))[name = string("input_47_cast_fp16")]; + tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; + fp16 var_3097_to_fp16 = const()[name = string("op_3097_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_3097_to_fp16, x = input_47_cast_fp16)[name = string("normed_45_cast_fp16")]; + tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; + tensor const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412583680)))]; + tensor x_45_cast_fp16 = mul(x = normed_47_cast_fp16, y = const_102_promoted_to_fp16)[name = string("x_45_cast_fp16")]; + tensor var_3127 = const()[name = string("op_3127"), val = tensor([0, 2, 1])]; + tensor input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor([2])]; + tensor var_3128 = transpose(perm = var_3127, x = x_45_cast_fp16)[name = string("transpose_226")]; + tensor input_49 = expand_dims(axes = input_49_axes_0, x = var_3128)[name = string("input_49")]; + string input_51_pad_type_0 = const()[name = string("input_51_pad_type_0"), val = string("valid")]; + tensor input_51_strides_0 = const()[name = string("input_51_strides_0"), val = tensor([1, 1])]; + tensor input_51_pad_0 = const()[name = string("input_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_51_dilations_0 = const()[name = string("input_51_dilations_0"), val = tensor([1, 1])]; + int32 input_51_groups_0 = const()[name = string("input_51_groups_0"), val = int32(1)]; + tensor input_51 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_49)[name = string("input_51")]; + string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; + tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; + tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; + int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; + tensor b_5 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_49)[name = string("b_5")]; + tensor c_5 = silu(x = input_51)[name = string("c_5")]; + tensor input_53 = mul(x = c_5, y = b_5)[name = string("input_53")]; + string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; + tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; + tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; + int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; + tensor e_5 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_53)[name = string("e_5")]; + tensor var_3150_axes_0 = const()[name = string("op_3150_axes_0"), val = tensor([2])]; + tensor var_3150 = squeeze(axes = var_3150_axes_0, x = e_5)[name = string("op_3150")]; + tensor var_3151 = const()[name = string("op_3151"), val = tensor([0, 2, 1])]; + tensor var_3152 = transpose(perm = var_3151, x = var_3150)[name = string("transpose_225")]; + tensor hidden_states_31_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_3152)[name = string("hidden_states_31_cast_fp16")]; + int32 var_3164 = const()[name = string("op_3164"), val = int32(-1)]; + fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3166_cast_fp16 = mul(x = hidden_states_31_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_3166_cast_fp16")]; + bool input_55_interleave_0 = const()[name = string("input_55_interleave_0"), val = bool(false)]; + tensor input_55_cast_fp16 = concat(axis = var_3164, interleave = input_55_interleave_0, values = (hidden_states_31_cast_fp16, var_3166_cast_fp16))[name = string("input_55_cast_fp16")]; + tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; + fp16 var_3161_to_fp16 = const()[name = string("op_3161_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_3161_to_fp16, x = input_55_cast_fp16)[name = string("normed_49_cast_fp16")]; + tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; + tensor const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412585792)))]; + tensor hidden_states_33_cast_fp16 = mul(x = normed_51_cast_fp16, y = const_106_promoted_to_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor var_3189 = const()[name = string("op_3189"), val = tensor([0, 2, 1])]; + tensor var_3192_axes_0 = const()[name = string("op_3192_axes_0"), val = tensor([2])]; + tensor var_3190_cast_fp16 = transpose(perm = var_3189, x = hidden_states_33_cast_fp16)[name = string("transpose_224")]; + tensor var_3192_cast_fp16 = expand_dims(axes = var_3192_axes_0, x = var_3190_cast_fp16)[name = string("op_3192_cast_fp16")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3192_cast_fp16)[name = string("query_states_25")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3192_cast_fp16)[name = string("key_states_31")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3192_cast_fp16)[name = string("value_states_25")]; + tensor var_3234 = const()[name = string("op_3234"), val = tensor([1, 16, 128, 64])]; + tensor var_3235 = reshape(shape = var_3234, x = query_states_25)[name = string("op_3235")]; + tensor var_3240 = const()[name = string("op_3240"), val = tensor([0, 1, 3, 2])]; + tensor var_3245 = const()[name = string("op_3245"), val = tensor([1, 8, 128, 64])]; + tensor var_3246 = reshape(shape = var_3245, x = key_states_31)[name = string("op_3246")]; + tensor var_3251 = const()[name = string("op_3251"), val = tensor([0, 1, 3, 2])]; + tensor var_3256 = const()[name = string("op_3256"), val = tensor([1, 8, 128, 64])]; + tensor var_3257 = reshape(shape = var_3256, x = value_states_25)[name = string("op_3257")]; + tensor var_3262 = const()[name = string("op_3262"), val = tensor([0, 1, 3, 2])]; + int32 var_3273 = const()[name = string("op_3273"), val = int32(-1)]; + fp16 const_108_promoted = const()[name = string("const_108_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_35 = transpose(perm = var_3240, x = var_3235)[name = string("transpose_223")]; + tensor var_3275 = mul(x = hidden_states_35, y = const_108_promoted)[name = string("op_3275")]; + bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; + tensor input_59 = concat(axis = var_3273, interleave = input_59_interleave_0, values = (hidden_states_35, var_3275))[name = string("input_59")]; + tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; + fp16 var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3270_to_fp16, x = input_59)[name = string("normed_53_cast_fp16")]; + tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; + tensor const_111 = const()[name = string("const_111"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412587904)))]; + tensor q_7 = mul(x = normed_55, y = const_111)[name = string("q_7")]; + int32 var_3298 = const()[name = string("op_3298"), val = int32(-1)]; + fp16 const_112_promoted = const()[name = string("const_112_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_37 = transpose(perm = var_3251, x = var_3246)[name = string("transpose_222")]; + tensor var_3300 = mul(x = hidden_states_37, y = const_112_promoted)[name = string("op_3300")]; + bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; + tensor input_61 = concat(axis = var_3298, interleave = input_61_interleave_0, values = (hidden_states_37, var_3300))[name = string("input_61")]; + tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; + fp16 var_3295_to_fp16 = const()[name = string("op_3295_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3295_to_fp16, x = input_61)[name = string("normed_57_cast_fp16")]; + tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; + tensor const_115 = const()[name = string("const_115"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412588224)))]; + tensor k_7 = mul(x = normed_59, y = const_115)[name = string("k_7")]; + tensor var_3326 = mul(x = q_7, y = cos_5)[name = string("op_3326")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7)[name = string("x2_13")]; + fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; + tensor var_3347 = mul(x = x2_13, y = const_118_promoted)[name = string("op_3347")]; + int32 var_3349 = const()[name = string("op_3349"), val = int32(-1)]; + bool var_3350_interleave_0 = const()[name = string("op_3350_interleave_0"), val = bool(false)]; + tensor var_3350 = concat(axis = var_3349, interleave = var_3350_interleave_0, values = (var_3347, x1_13))[name = string("op_3350")]; + tensor var_3351 = mul(x = var_3350, y = sin_5)[name = string("op_3351")]; + tensor query_states_27 = add(x = var_3326, y = var_3351)[name = string("query_states_27")]; + tensor var_3354 = mul(x = k_7, y = cos_5)[name = string("op_3354")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7)[name = string("x2_15")]; + fp16 const_121_promoted = const()[name = string("const_121_promoted"), val = fp16(-0x1p+0)]; + tensor var_3375 = mul(x = x2_15, y = const_121_promoted)[name = string("op_3375")]; + int32 var_3377 = const()[name = string("op_3377"), val = int32(-1)]; + bool var_3378_interleave_0 = const()[name = string("op_3378_interleave_0"), val = bool(false)]; + tensor var_3378 = concat(axis = var_3377, interleave = var_3378_interleave_0, values = (var_3375, x1_15))[name = string("op_3378")]; + tensor var_3379 = mul(x = var_3378, y = sin_5)[name = string("op_3379")]; + tensor key_states_33 = add(x = var_3354, y = var_3379)[name = string("key_states_33")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_1781, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = key_states_33, x = coreml_update_state_61)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_62")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([31])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([32])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_1781, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_27 = transpose(perm = var_3262, x = var_3257)[name = string("transpose_221")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_27, x = coreml_update_state_62)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_63")]; + tensor var_3450_begin_0 = const()[name = string("op_3450_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_3450_end_0 = const()[name = string("op_3450_end_0"), val = tensor([4, 8, 4096, 128])]; + tensor var_3450_end_mask_0 = const()[name = string("op_3450_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3450_cast_fp16 = slice_by_index(begin = var_3450_begin_0, end = var_3450_end_0, end_mask = var_3450_end_mask_0, x = coreml_update_state_63)[name = string("op_3450_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_3450_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_3457_begin_0 = const()[name = string("op_3457_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor var_3457_end_0 = const()[name = string("op_3457_end_0"), val = tensor([32, 8, 4096, 128])]; + tensor var_3457_end_mask_0 = const()[name = string("op_3457_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3457_cast_fp16 = slice_by_index(begin = var_3457_begin_0, end = var_3457_end_0, end_mask = var_3457_end_mask_0, x = coreml_update_state_63)[name = string("op_3457_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_3457_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_51_axes_0 = const()[name = string("x_51_axes_0"), val = tensor([1])]; + tensor x_51_cast_fp16 = expand_dims(axes = x_51_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_3486 = const()[name = string("op_3486"), val = tensor([1, 2, 1, 1])]; + tensor x_53_cast_fp16 = tile(reps = var_3486, x = x_51_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_3498 = const()[name = string("op_3498"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_37_cast_fp16 = reshape(shape = var_3498, x = x_53_cast_fp16)[name = string("key_states_37_cast_fp16")]; + tensor x_57_axes_0 = const()[name = string("x_57_axes_0"), val = tensor([1])]; + tensor x_57_cast_fp16 = expand_dims(axes = x_57_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor var_3506 = const()[name = string("op_3506"), val = tensor([1, 2, 1, 1])]; + tensor x_59_cast_fp16 = tile(reps = var_3506, x = x_57_cast_fp16)[name = string("x_59_cast_fp16")]; + bool var_3533_transpose_x_0 = const()[name = string("op_3533_transpose_x_0"), val = bool(false)]; + bool var_3533_transpose_y_0 = const()[name = string("op_3533_transpose_y_0"), val = bool(true)]; + tensor var_3533 = matmul(transpose_x = var_3533_transpose_x_0, transpose_y = var_3533_transpose_y_0, x = query_states_27, y = key_states_37_cast_fp16)[name = string("op_3533")]; + fp16 var_3534_to_fp16 = const()[name = string("op_3534_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_3533, y = var_3534_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("attn_weights_15_cast_fp16")]; + int32 var_3569 = const()[name = string("op_3569"), val = int32(-1)]; + tensor var_3571_cast_fp16 = softmax(axis = var_3569, x = attn_weights_15_cast_fp16)[name = string("op_3571_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([16, 64, 4096])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_3571_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([16, 4096, 128])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_59_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 16, 64, 128])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_3583_perm_0 = const()[name = string("op_3583_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_3602 = const()[name = string("op_3602"), val = tensor([1, 64, 2048])]; + tensor var_3583_cast_fp16 = transpose(perm = var_3583_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_220")]; + tensor attn_output_35_cast_fp16 = reshape(shape = var_3602, x = var_3583_cast_fp16)[name = string("attn_output_35_cast_fp16")]; + tensor var_3607 = const()[name = string("op_3607"), val = tensor([0, 2, 1])]; + string var_3623_pad_type_0 = const()[name = string("op_3623_pad_type_0"), val = string("valid")]; + int32 var_3623_groups_0 = const()[name = string("op_3623_groups_0"), val = int32(1)]; + tensor var_3623_strides_0 = const()[name = string("op_3623_strides_0"), val = tensor([1])]; + tensor var_3623_pad_0 = const()[name = string("op_3623_pad_0"), val = tensor([0, 0])]; + tensor var_3623_dilations_0 = const()[name = string("op_3623_dilations_0"), val = tensor([1])]; + tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412588544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414685760))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_3608_cast_fp16 = transpose(perm = var_3607, x = attn_output_35_cast_fp16)[name = string("transpose_219")]; + tensor var_3623_cast_fp16 = conv(dilations = var_3623_dilations_0, groups = var_3623_groups_0, pad = var_3623_pad_0, pad_type = var_3623_pad_type_0, strides = var_3623_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3608_cast_fp16)[name = string("op_3623_cast_fp16")]; + tensor var_3627 = const()[name = string("op_3627"), val = tensor([0, 2, 1])]; + tensor attn_output_39_cast_fp16 = transpose(perm = var_3627, x = var_3623_cast_fp16)[name = string("transpose_218")]; + tensor hidden_states_39_cast_fp16 = add(x = hidden_states_31_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; + int32 var_3640 = const()[name = string("op_3640"), val = int32(-1)]; + fp16 const_133_promoted_to_fp16 = const()[name = string("const_133_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3642_cast_fp16 = mul(x = hidden_states_39_cast_fp16, y = const_133_promoted_to_fp16)[name = string("op_3642_cast_fp16")]; + bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; + tensor input_65_cast_fp16 = concat(axis = var_3640, interleave = input_65_interleave_0, values = (hidden_states_39_cast_fp16, var_3642_cast_fp16))[name = string("input_65_cast_fp16")]; + tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; + fp16 var_3637_to_fp16 = const()[name = string("op_3637_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3637_to_fp16, x = input_65_cast_fp16)[name = string("normed_61_cast_fp16")]; + tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; + tensor const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414751360)))]; + tensor x_61_cast_fp16 = mul(x = normed_63_cast_fp16, y = const_136_promoted_to_fp16)[name = string("x_61_cast_fp16")]; + tensor var_3667 = const()[name = string("op_3667"), val = tensor([0, 2, 1])]; + tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([2])]; + tensor var_3668 = transpose(perm = var_3667, x = x_61_cast_fp16)[name = string("transpose_217")]; + tensor input_67 = expand_dims(axes = input_67_axes_0, x = var_3668)[name = string("input_67")]; + string input_69_pad_type_0 = const()[name = string("input_69_pad_type_0"), val = string("valid")]; + tensor input_69_strides_0 = const()[name = string("input_69_strides_0"), val = tensor([1, 1])]; + tensor input_69_pad_0 = const()[name = string("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_69_dilations_0 = const()[name = string("input_69_dilations_0"), val = tensor([1, 1])]; + int32 input_69_groups_0 = const()[name = string("input_69_groups_0"), val = int32(1)]; + tensor input_69 = conv(dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_67)[name = string("input_69")]; + string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; + tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; + tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; + int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; + tensor b_7 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_67)[name = string("b_7")]; + tensor c_7 = silu(x = input_69)[name = string("c_7")]; + tensor input_71 = mul(x = c_7, y = b_7)[name = string("input_71")]; + string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; + tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; + tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; + int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; + tensor e_7 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_71)[name = string("e_7")]; + tensor var_3690_axes_0 = const()[name = string("op_3690_axes_0"), val = tensor([2])]; + tensor var_3690 = squeeze(axes = var_3690_axes_0, x = e_7)[name = string("op_3690")]; + tensor var_3691 = const()[name = string("op_3691"), val = tensor([0, 2, 1])]; + tensor var_3692 = transpose(perm = var_3691, x = var_3690)[name = string("transpose_216")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_39_cast_fp16, y = var_3692)[name = string("hidden_states_41_cast_fp16")]; + int32 var_3704 = const()[name = string("op_3704"), val = int32(-1)]; + fp16 const_137_promoted_to_fp16 = const()[name = string("const_137_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3706_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_137_promoted_to_fp16)[name = string("op_3706_cast_fp16")]; + bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; + tensor input_73_cast_fp16 = concat(axis = var_3704, interleave = input_73_interleave_0, values = (hidden_states_41_cast_fp16, var_3706_cast_fp16))[name = string("input_73_cast_fp16")]; + tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; + fp16 var_3701_to_fp16 = const()[name = string("op_3701_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3701_to_fp16, x = input_73_cast_fp16)[name = string("normed_65_cast_fp16")]; + tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; + tensor const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414753472)))]; + tensor hidden_states_43_cast_fp16 = mul(x = normed_67_cast_fp16, y = const_140_promoted_to_fp16)[name = string("hidden_states_43_cast_fp16")]; + tensor var_3729 = const()[name = string("op_3729"), val = tensor([0, 2, 1])]; + tensor var_3732_axes_0 = const()[name = string("op_3732_axes_0"), val = tensor([2])]; + tensor var_3730_cast_fp16 = transpose(perm = var_3729, x = hidden_states_43_cast_fp16)[name = string("transpose_215")]; + tensor var_3732_cast_fp16 = expand_dims(axes = var_3732_axes_0, x = var_3730_cast_fp16)[name = string("op_3732_cast_fp16")]; + string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; + tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; + tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; + int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; + tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_3732_cast_fp16)[name = string("query_states_33")]; + string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; + tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; + tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; + int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; + tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_3732_cast_fp16)[name = string("key_states_41")]; + string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; + tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; + tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; + int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; + tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_3732_cast_fp16)[name = string("value_states_33")]; + tensor var_3774 = const()[name = string("op_3774"), val = tensor([1, 16, 128, 64])]; + tensor var_3775 = reshape(shape = var_3774, x = query_states_33)[name = string("op_3775")]; + tensor var_3780 = const()[name = string("op_3780"), val = tensor([0, 1, 3, 2])]; + tensor var_3785 = const()[name = string("op_3785"), val = tensor([1, 8, 128, 64])]; + tensor var_3786 = reshape(shape = var_3785, x = key_states_41)[name = string("op_3786")]; + tensor var_3791 = const()[name = string("op_3791"), val = tensor([0, 1, 3, 2])]; + tensor var_3796 = const()[name = string("op_3796"), val = tensor([1, 8, 128, 64])]; + tensor var_3797 = reshape(shape = var_3796, x = value_states_33)[name = string("op_3797")]; + tensor var_3802 = const()[name = string("op_3802"), val = tensor([0, 1, 3, 2])]; + int32 var_3813 = const()[name = string("op_3813"), val = int32(-1)]; + fp16 const_142_promoted = const()[name = string("const_142_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_45 = transpose(perm = var_3780, x = var_3775)[name = string("transpose_214")]; + tensor var_3815 = mul(x = hidden_states_45, y = const_142_promoted)[name = string("op_3815")]; + bool input_77_interleave_0 = const()[name = string("input_77_interleave_0"), val = bool(false)]; + tensor input_77 = concat(axis = var_3813, interleave = input_77_interleave_0, values = (hidden_states_45, var_3815))[name = string("input_77")]; + tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; + fp16 var_3810_to_fp16 = const()[name = string("op_3810_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3810_to_fp16, x = input_77)[name = string("normed_69_cast_fp16")]; + tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_71 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71")]; + tensor const_145 = const()[name = string("const_145"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414755584)))]; + tensor q_9 = mul(x = normed_71, y = const_145)[name = string("q_9")]; + int32 var_3838 = const()[name = string("op_3838"), val = int32(-1)]; + fp16 const_146_promoted = const()[name = string("const_146_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_47 = transpose(perm = var_3791, x = var_3786)[name = string("transpose_213")]; + tensor var_3840 = mul(x = hidden_states_47, y = const_146_promoted)[name = string("op_3840")]; + bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; + tensor input_79 = concat(axis = var_3838, interleave = input_79_interleave_0, values = (hidden_states_47, var_3840))[name = string("input_79")]; + tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; + fp16 var_3835_to_fp16 = const()[name = string("op_3835_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3835_to_fp16, x = input_79)[name = string("normed_73_cast_fp16")]; + tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_75 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75")]; + tensor const_149 = const()[name = string("const_149"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414755904)))]; + tensor k_9 = mul(x = normed_75, y = const_149)[name = string("k_9")]; + tensor var_3866 = mul(x = q_9, y = cos_5)[name = string("op_3866")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9)[name = string("x2_17")]; + fp16 const_152_promoted = const()[name = string("const_152_promoted"), val = fp16(-0x1p+0)]; + tensor var_3887 = mul(x = x2_17, y = const_152_promoted)[name = string("op_3887")]; + int32 var_3889 = const()[name = string("op_3889"), val = int32(-1)]; + bool var_3890_interleave_0 = const()[name = string("op_3890_interleave_0"), val = bool(false)]; + tensor var_3890 = concat(axis = var_3889, interleave = var_3890_interleave_0, values = (var_3887, x1_17))[name = string("op_3890")]; + tensor var_3891 = mul(x = var_3890, y = sin_5)[name = string("op_3891")]; + tensor query_states_35 = add(x = var_3866, y = var_3891)[name = string("query_states_35")]; + tensor var_3894 = mul(x = k_9, y = cos_5)[name = string("op_3894")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9)[name = string("x2_19")]; + fp16 const_155_promoted = const()[name = string("const_155_promoted"), val = fp16(-0x1p+0)]; + tensor var_3915 = mul(x = x2_19, y = const_155_promoted)[name = string("op_3915")]; + int32 var_3917 = const()[name = string("op_3917"), val = int32(-1)]; + bool var_3918_interleave_0 = const()[name = string("op_3918_interleave_0"), val = bool(false)]; + tensor var_3918 = concat(axis = var_3917, interleave = var_3918_interleave_0, values = (var_3915, x1_19))[name = string("op_3918")]; + tensor var_3919 = mul(x = var_3918, y = sin_5)[name = string("op_3919")]; + tensor key_states_43 = add(x = var_3894, y = var_3919)[name = string("key_states_43")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_1781, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = key_states_43, x = coreml_update_state_63)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_64")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([32])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([33])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_1781, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_35 = transpose(perm = var_3802, x = var_3797)[name = string("transpose_212")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_35, x = coreml_update_state_64)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_65")]; + tensor var_3990_begin_0 = const()[name = string("op_3990_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_3990_end_0 = const()[name = string("op_3990_end_0"), val = tensor([5, 8, 4096, 128])]; + tensor var_3990_end_mask_0 = const()[name = string("op_3990_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3990_cast_fp16 = slice_by_index(begin = var_3990_begin_0, end = var_3990_end_0, end_mask = var_3990_end_mask_0, x = coreml_update_state_65)[name = string("op_3990_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_3990_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_3997_begin_0 = const()[name = string("op_3997_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_3997_end_0 = const()[name = string("op_3997_end_0"), val = tensor([33, 8, 4096, 128])]; + tensor var_3997_end_mask_0 = const()[name = string("op_3997_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_3997_cast_fp16 = slice_by_index(begin = var_3997_begin_0, end = var_3997_end_0, end_mask = var_3997_end_mask_0, x = coreml_update_state_65)[name = string("op_3997_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_3997_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_4026 = const()[name = string("op_4026"), val = tensor([1, 2, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_4026, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_4038 = const()[name = string("op_4038"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_47_cast_fp16 = reshape(shape = var_4038, x = x_69_cast_fp16)[name = string("key_states_47_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_4046 = const()[name = string("op_4046"), val = tensor([1, 2, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_4046, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_4073_transpose_x_0 = const()[name = string("op_4073_transpose_x_0"), val = bool(false)]; + bool var_4073_transpose_y_0 = const()[name = string("op_4073_transpose_y_0"), val = bool(true)]; + tensor var_4073 = matmul(transpose_x = var_4073_transpose_x_0, transpose_y = var_4073_transpose_y_0, x = query_states_35, y = key_states_47_cast_fp16)[name = string("op_4073")]; + fp16 var_4074_to_fp16 = const()[name = string("op_4074_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_4073, y = var_4074_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("attn_weights_19_cast_fp16")]; + int32 var_4109 = const()[name = string("op_4109"), val = int32(-1)]; + tensor var_4111_cast_fp16 = softmax(axis = var_4109, x = attn_weights_19_cast_fp16)[name = string("op_4111_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([16, 64, 4096])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_4111_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([16, 4096, 128])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_75_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 16, 64, 128])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_4123_perm_0 = const()[name = string("op_4123_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4142 = const()[name = string("op_4142"), val = tensor([1, 64, 2048])]; + tensor var_4123_cast_fp16 = transpose(perm = var_4123_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_211")]; + tensor attn_output_45_cast_fp16 = reshape(shape = var_4142, x = var_4123_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_4147 = const()[name = string("op_4147"), val = tensor([0, 2, 1])]; + string var_4163_pad_type_0 = const()[name = string("op_4163_pad_type_0"), val = string("valid")]; + int32 var_4163_groups_0 = const()[name = string("op_4163_groups_0"), val = int32(1)]; + tensor var_4163_strides_0 = const()[name = string("op_4163_strides_0"), val = tensor([1])]; + tensor var_4163_pad_0 = const()[name = string("op_4163_pad_0"), val = tensor([0, 0])]; + tensor var_4163_dilations_0 = const()[name = string("op_4163_dilations_0"), val = tensor([1])]; + tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414756224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416853440))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4148_cast_fp16 = transpose(perm = var_4147, x = attn_output_45_cast_fp16)[name = string("transpose_210")]; + tensor var_4163_cast_fp16 = conv(dilations = var_4163_dilations_0, groups = var_4163_groups_0, pad = var_4163_pad_0, pad_type = var_4163_pad_type_0, strides = var_4163_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4148_cast_fp16)[name = string("op_4163_cast_fp16")]; + tensor var_4167 = const()[name = string("op_4167"), val = tensor([0, 2, 1])]; + tensor attn_output_49_cast_fp16 = transpose(perm = var_4167, x = var_4163_cast_fp16)[name = string("transpose_209")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + int32 var_4180 = const()[name = string("op_4180"), val = int32(-1)]; + fp16 const_167_promoted_to_fp16 = const()[name = string("const_167_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4182_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_167_promoted_to_fp16)[name = string("op_4182_cast_fp16")]; + bool input_83_interleave_0 = const()[name = string("input_83_interleave_0"), val = bool(false)]; + tensor input_83_cast_fp16 = concat(axis = var_4180, interleave = input_83_interleave_0, values = (hidden_states_49_cast_fp16, var_4182_cast_fp16))[name = string("input_83_cast_fp16")]; + tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; + fp16 var_4177_to_fp16 = const()[name = string("op_4177_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_4177_to_fp16, x = input_83_cast_fp16)[name = string("normed_77_cast_fp16")]; + tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_79_cast_fp16 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79_cast_fp16")]; + tensor const_170_promoted_to_fp16 = const()[name = string("const_170_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416919040)))]; + tensor x_77_cast_fp16 = mul(x = normed_79_cast_fp16, y = const_170_promoted_to_fp16)[name = string("x_77_cast_fp16")]; + tensor var_4207 = const()[name = string("op_4207"), val = tensor([0, 2, 1])]; + tensor input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor([2])]; + tensor var_4208 = transpose(perm = var_4207, x = x_77_cast_fp16)[name = string("transpose_208")]; + tensor input_85 = expand_dims(axes = input_85_axes_0, x = var_4208)[name = string("input_85")]; + string input_87_pad_type_0 = const()[name = string("input_87_pad_type_0"), val = string("valid")]; + tensor input_87_strides_0 = const()[name = string("input_87_strides_0"), val = tensor([1, 1])]; + tensor input_87_pad_0 = const()[name = string("input_87_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_87_dilations_0 = const()[name = string("input_87_dilations_0"), val = tensor([1, 1])]; + int32 input_87_groups_0 = const()[name = string("input_87_groups_0"), val = int32(1)]; + tensor input_87 = conv(dilations = input_87_dilations_0, groups = input_87_groups_0, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = input_87_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_85)[name = string("input_87")]; + string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; + tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; + tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; + int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; + tensor b_9 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_85)[name = string("b_9")]; + tensor c_9 = silu(x = input_87)[name = string("c_9")]; + tensor input_89 = mul(x = c_9, y = b_9)[name = string("input_89")]; + string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; + tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; + tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; + int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; + tensor e_9 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_89)[name = string("e_9")]; + tensor var_4230_axes_0 = const()[name = string("op_4230_axes_0"), val = tensor([2])]; + tensor var_4230 = squeeze(axes = var_4230_axes_0, x = e_9)[name = string("op_4230")]; + tensor var_4231 = const()[name = string("op_4231"), val = tensor([0, 2, 1])]; + tensor var_4232 = transpose(perm = var_4231, x = var_4230)[name = string("transpose_207")]; + tensor hidden_states_51_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = var_4232)[name = string("hidden_states_51_cast_fp16")]; + int32 var_4244 = const()[name = string("op_4244"), val = int32(-1)]; + fp16 const_171_promoted_to_fp16 = const()[name = string("const_171_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4246_cast_fp16 = mul(x = hidden_states_51_cast_fp16, y = const_171_promoted_to_fp16)[name = string("op_4246_cast_fp16")]; + bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; + tensor input_91_cast_fp16 = concat(axis = var_4244, interleave = input_91_interleave_0, values = (hidden_states_51_cast_fp16, var_4246_cast_fp16))[name = string("input_91_cast_fp16")]; + tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; + fp16 var_4241_to_fp16 = const()[name = string("op_4241_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_4241_to_fp16, x = input_91_cast_fp16)[name = string("normed_81_cast_fp16")]; + tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_83_cast_fp16 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83_cast_fp16")]; + tensor const_174_promoted_to_fp16 = const()[name = string("const_174_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416921152)))]; + tensor hidden_states_53_cast_fp16 = mul(x = normed_83_cast_fp16, y = const_174_promoted_to_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor var_4269 = const()[name = string("op_4269"), val = tensor([0, 2, 1])]; + tensor var_4272_axes_0 = const()[name = string("op_4272_axes_0"), val = tensor([2])]; + tensor var_4270_cast_fp16 = transpose(perm = var_4269, x = hidden_states_53_cast_fp16)[name = string("transpose_206")]; + tensor var_4272_cast_fp16 = expand_dims(axes = var_4272_axes_0, x = var_4270_cast_fp16)[name = string("op_4272_cast_fp16")]; + string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; + tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; + tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; + int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; + tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_4272_cast_fp16)[name = string("query_states_41")]; + string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; + tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; + tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; + int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; + tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_4272_cast_fp16)[name = string("key_states_51")]; + string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; + tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; + tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; + int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; + tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_4272_cast_fp16)[name = string("value_states_41")]; + tensor var_4314 = const()[name = string("op_4314"), val = tensor([1, 16, 128, 64])]; + tensor var_4315 = reshape(shape = var_4314, x = query_states_41)[name = string("op_4315")]; + tensor var_4320 = const()[name = string("op_4320"), val = tensor([0, 1, 3, 2])]; + tensor var_4325 = const()[name = string("op_4325"), val = tensor([1, 8, 128, 64])]; + tensor var_4326 = reshape(shape = var_4325, x = key_states_51)[name = string("op_4326")]; + tensor var_4331 = const()[name = string("op_4331"), val = tensor([0, 1, 3, 2])]; + tensor var_4336 = const()[name = string("op_4336"), val = tensor([1, 8, 128, 64])]; + tensor var_4337 = reshape(shape = var_4336, x = value_states_41)[name = string("op_4337")]; + tensor var_4342 = const()[name = string("op_4342"), val = tensor([0, 1, 3, 2])]; + int32 var_4353 = const()[name = string("op_4353"), val = int32(-1)]; + fp16 const_176_promoted = const()[name = string("const_176_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_55 = transpose(perm = var_4320, x = var_4315)[name = string("transpose_205")]; + tensor var_4355 = mul(x = hidden_states_55, y = const_176_promoted)[name = string("op_4355")]; + bool input_95_interleave_0 = const()[name = string("input_95_interleave_0"), val = bool(false)]; + tensor input_95 = concat(axis = var_4353, interleave = input_95_interleave_0, values = (hidden_states_55, var_4355))[name = string("input_95")]; + tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; + fp16 var_4350_to_fp16 = const()[name = string("op_4350_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_4350_to_fp16, x = input_95)[name = string("normed_85_cast_fp16")]; + tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_87 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87")]; + tensor const_179 = const()[name = string("const_179"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416923264)))]; + tensor q_11 = mul(x = normed_87, y = const_179)[name = string("q_11")]; + int32 var_4378 = const()[name = string("op_4378"), val = int32(-1)]; + fp16 const_180_promoted = const()[name = string("const_180_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_57 = transpose(perm = var_4331, x = var_4326)[name = string("transpose_204")]; + tensor var_4380 = mul(x = hidden_states_57, y = const_180_promoted)[name = string("op_4380")]; + bool input_97_interleave_0 = const()[name = string("input_97_interleave_0"), val = bool(false)]; + tensor input_97 = concat(axis = var_4378, interleave = input_97_interleave_0, values = (hidden_states_57, var_4380))[name = string("input_97")]; + tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; + fp16 var_4375_to_fp16 = const()[name = string("op_4375_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4375_to_fp16, x = input_97)[name = string("normed_89_cast_fp16")]; + tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_91 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91")]; + tensor const_183 = const()[name = string("const_183"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416923584)))]; + tensor k_11 = mul(x = normed_91, y = const_183)[name = string("k_11")]; + tensor var_4406 = mul(x = q_11, y = cos_5)[name = string("op_4406")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11)[name = string("x2_21")]; + fp16 const_186_promoted = const()[name = string("const_186_promoted"), val = fp16(-0x1p+0)]; + tensor var_4427 = mul(x = x2_21, y = const_186_promoted)[name = string("op_4427")]; + int32 var_4429 = const()[name = string("op_4429"), val = int32(-1)]; + bool var_4430_interleave_0 = const()[name = string("op_4430_interleave_0"), val = bool(false)]; + tensor var_4430 = concat(axis = var_4429, interleave = var_4430_interleave_0, values = (var_4427, x1_21))[name = string("op_4430")]; + tensor var_4431 = mul(x = var_4430, y = sin_5)[name = string("op_4431")]; + tensor query_states_43 = add(x = var_4406, y = var_4431)[name = string("query_states_43")]; + tensor var_4434 = mul(x = k_11, y = cos_5)[name = string("op_4434")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11)[name = string("x2_23")]; + fp16 const_189_promoted = const()[name = string("const_189_promoted"), val = fp16(-0x1p+0)]; + tensor var_4455 = mul(x = x2_23, y = const_189_promoted)[name = string("op_4455")]; + int32 var_4457 = const()[name = string("op_4457"), val = int32(-1)]; + bool var_4458_interleave_0 = const()[name = string("op_4458_interleave_0"), val = bool(false)]; + tensor var_4458 = concat(axis = var_4457, interleave = var_4458_interleave_0, values = (var_4455, x1_23))[name = string("op_4458")]; + tensor var_4459 = mul(x = var_4458, y = sin_5)[name = string("op_4459")]; + tensor key_states_53 = add(x = var_4434, y = var_4459)[name = string("key_states_53")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_1781, concat_93_values3_0))[name = string("concat_93")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = key_states_53, x = coreml_update_state_65)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_66")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([33])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([34])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; + tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; + tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; + int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; + bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; + tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_1781, concat_97_values3_0))[name = string("concat_97")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_43 = transpose(perm = var_4342, x = var_4337)[name = string("transpose_203")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_43, x = coreml_update_state_66)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_67")]; + tensor var_4530_begin_0 = const()[name = string("op_4530_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_4530_end_0 = const()[name = string("op_4530_end_0"), val = tensor([6, 8, 4096, 128])]; + tensor var_4530_end_mask_0 = const()[name = string("op_4530_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4530_cast_fp16 = slice_by_index(begin = var_4530_begin_0, end = var_4530_end_0, end_mask = var_4530_end_mask_0, x = coreml_update_state_67)[name = string("op_4530_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_4530_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_4537_begin_0 = const()[name = string("op_4537_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_4537_end_0 = const()[name = string("op_4537_end_0"), val = tensor([34, 8, 4096, 128])]; + tensor var_4537_end_mask_0 = const()[name = string("op_4537_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_4537_cast_fp16 = slice_by_index(begin = var_4537_begin_0, end = var_4537_end_0, end_mask = var_4537_end_mask_0, x = coreml_update_state_67)[name = string("op_4537_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_4537_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_83_axes_0 = const()[name = string("x_83_axes_0"), val = tensor([1])]; + tensor x_83_cast_fp16 = expand_dims(axes = x_83_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_83_cast_fp16")]; + tensor var_4566 = const()[name = string("op_4566"), val = tensor([1, 2, 1, 1])]; + tensor x_85_cast_fp16 = tile(reps = var_4566, x = x_83_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_4578 = const()[name = string("op_4578"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_57_cast_fp16 = reshape(shape = var_4578, x = x_85_cast_fp16)[name = string("key_states_57_cast_fp16")]; + tensor x_89_axes_0 = const()[name = string("x_89_axes_0"), val = tensor([1])]; + tensor x_89_cast_fp16 = expand_dims(axes = x_89_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_89_cast_fp16")]; + tensor var_4586 = const()[name = string("op_4586"), val = tensor([1, 2, 1, 1])]; + tensor x_91_cast_fp16 = tile(reps = var_4586, x = x_89_cast_fp16)[name = string("x_91_cast_fp16")]; + bool var_4613_transpose_x_0 = const()[name = string("op_4613_transpose_x_0"), val = bool(false)]; + bool var_4613_transpose_y_0 = const()[name = string("op_4613_transpose_y_0"), val = bool(true)]; + tensor var_4613 = matmul(transpose_x = var_4613_transpose_x_0, transpose_y = var_4613_transpose_y_0, x = query_states_43, y = key_states_57_cast_fp16)[name = string("op_4613")]; + fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_4613, y = var_4614_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; + int32 var_4649 = const()[name = string("op_4649"), val = int32(-1)]; + tensor var_4651_cast_fp16 = softmax(axis = var_4649, x = attn_weights_23_cast_fp16)[name = string("op_4651_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([16, 64, 4096])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_4651_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([16, 4096, 128])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_91_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 16, 64, 128])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_4663_perm_0 = const()[name = string("op_4663_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_4682 = const()[name = string("op_4682"), val = tensor([1, 64, 2048])]; + tensor var_4663_cast_fp16 = transpose(perm = var_4663_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_202")]; + tensor attn_output_55_cast_fp16 = reshape(shape = var_4682, x = var_4663_cast_fp16)[name = string("attn_output_55_cast_fp16")]; + tensor var_4687 = const()[name = string("op_4687"), val = tensor([0, 2, 1])]; + string var_4703_pad_type_0 = const()[name = string("op_4703_pad_type_0"), val = string("valid")]; + int32 var_4703_groups_0 = const()[name = string("op_4703_groups_0"), val = int32(1)]; + tensor var_4703_strides_0 = const()[name = string("op_4703_strides_0"), val = tensor([1])]; + tensor var_4703_pad_0 = const()[name = string("op_4703_pad_0"), val = tensor([0, 0])]; + tensor var_4703_dilations_0 = const()[name = string("op_4703_dilations_0"), val = tensor([1])]; + tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416923904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419021120))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_4688_cast_fp16 = transpose(perm = var_4687, x = attn_output_55_cast_fp16)[name = string("transpose_201")]; + tensor var_4703_cast_fp16 = conv(dilations = var_4703_dilations_0, groups = var_4703_groups_0, pad = var_4703_pad_0, pad_type = var_4703_pad_type_0, strides = var_4703_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_4688_cast_fp16)[name = string("op_4703_cast_fp16")]; + tensor var_4707 = const()[name = string("op_4707"), val = tensor([0, 2, 1])]; + tensor attn_output_59_cast_fp16 = transpose(perm = var_4707, x = var_4703_cast_fp16)[name = string("transpose_200")]; + tensor hidden_states_59_cast_fp16 = add(x = hidden_states_51_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; + int32 var_4720 = const()[name = string("op_4720"), val = int32(-1)]; + fp16 const_201_promoted_to_fp16 = const()[name = string("const_201_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4722_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = const_201_promoted_to_fp16)[name = string("op_4722_cast_fp16")]; + bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; + tensor input_101_cast_fp16 = concat(axis = var_4720, interleave = input_101_interleave_0, values = (hidden_states_59_cast_fp16, var_4722_cast_fp16))[name = string("input_101_cast_fp16")]; + tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; + fp16 var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4717_to_fp16, x = input_101_cast_fp16)[name = string("normed_93_cast_fp16")]; + tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; + tensor const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419086720)))]; + tensor x_93_cast_fp16 = mul(x = normed_95_cast_fp16, y = const_204_promoted_to_fp16)[name = string("x_93_cast_fp16")]; + tensor var_4747 = const()[name = string("op_4747"), val = tensor([0, 2, 1])]; + tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([2])]; + tensor var_4748 = transpose(perm = var_4747, x = x_93_cast_fp16)[name = string("transpose_199")]; + tensor input_103 = expand_dims(axes = input_103_axes_0, x = var_4748)[name = string("input_103")]; + string input_105_pad_type_0 = const()[name = string("input_105_pad_type_0"), val = string("valid")]; + tensor input_105_strides_0 = const()[name = string("input_105_strides_0"), val = tensor([1, 1])]; + tensor input_105_pad_0 = const()[name = string("input_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_105_dilations_0 = const()[name = string("input_105_dilations_0"), val = tensor([1, 1])]; + int32 input_105_groups_0 = const()[name = string("input_105_groups_0"), val = int32(1)]; + tensor input_105 = conv(dilations = input_105_dilations_0, groups = input_105_groups_0, pad = input_105_pad_0, pad_type = input_105_pad_type_0, strides = input_105_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_103)[name = string("input_105")]; + string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; + tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; + tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; + int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; + tensor b_11 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_103)[name = string("b_11")]; + tensor c_11 = silu(x = input_105)[name = string("c_11")]; + tensor input_107 = mul(x = c_11, y = b_11)[name = string("input_107")]; + string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; + tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; + tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; + int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; + tensor e_11 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_107)[name = string("e_11")]; + tensor var_4770_axes_0 = const()[name = string("op_4770_axes_0"), val = tensor([2])]; + tensor var_4770 = squeeze(axes = var_4770_axes_0, x = e_11)[name = string("op_4770")]; + tensor var_4771 = const()[name = string("op_4771"), val = tensor([0, 2, 1])]; + tensor var_4772 = transpose(perm = var_4771, x = var_4770)[name = string("transpose_198")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = var_4772)[name = string("hidden_states_61_cast_fp16")]; + int32 var_4784 = const()[name = string("op_4784"), val = int32(-1)]; + fp16 const_205_promoted_to_fp16 = const()[name = string("const_205_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4786_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_205_promoted_to_fp16)[name = string("op_4786_cast_fp16")]; + bool input_109_interleave_0 = const()[name = string("input_109_interleave_0"), val = bool(false)]; + tensor input_109_cast_fp16 = concat(axis = var_4784, interleave = input_109_interleave_0, values = (hidden_states_61_cast_fp16, var_4786_cast_fp16))[name = string("input_109_cast_fp16")]; + tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; + fp16 var_4781_to_fp16 = const()[name = string("op_4781_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4781_to_fp16, x = input_109_cast_fp16)[name = string("normed_97_cast_fp16")]; + tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; + tensor const_208_promoted_to_fp16 = const()[name = string("const_208_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419088832)))]; + tensor hidden_states_63_cast_fp16 = mul(x = normed_99_cast_fp16, y = const_208_promoted_to_fp16)[name = string("hidden_states_63_cast_fp16")]; + tensor var_4809 = const()[name = string("op_4809"), val = tensor([0, 2, 1])]; + tensor var_4812_axes_0 = const()[name = string("op_4812_axes_0"), val = tensor([2])]; + tensor var_4810_cast_fp16 = transpose(perm = var_4809, x = hidden_states_63_cast_fp16)[name = string("transpose_197")]; + tensor var_4812_cast_fp16 = expand_dims(axes = var_4812_axes_0, x = var_4810_cast_fp16)[name = string("op_4812_cast_fp16")]; + string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; + tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; + tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; + int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; + tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_4812_cast_fp16)[name = string("query_states_49")]; + string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; + tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; + tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; + int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; + tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_4812_cast_fp16)[name = string("key_states_61")]; + string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; + tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; + tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; + int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; + tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_4812_cast_fp16)[name = string("value_states_49")]; + tensor var_4854 = const()[name = string("op_4854"), val = tensor([1, 16, 128, 64])]; + tensor var_4855 = reshape(shape = var_4854, x = query_states_49)[name = string("op_4855")]; + tensor var_4860 = const()[name = string("op_4860"), val = tensor([0, 1, 3, 2])]; + tensor var_4865 = const()[name = string("op_4865"), val = tensor([1, 8, 128, 64])]; + tensor var_4866 = reshape(shape = var_4865, x = key_states_61)[name = string("op_4866")]; + tensor var_4871 = const()[name = string("op_4871"), val = tensor([0, 1, 3, 2])]; + tensor var_4876 = const()[name = string("op_4876"), val = tensor([1, 8, 128, 64])]; + tensor var_4877 = reshape(shape = var_4876, x = value_states_49)[name = string("op_4877")]; + tensor var_4882 = const()[name = string("op_4882"), val = tensor([0, 1, 3, 2])]; + int32 var_4893 = const()[name = string("op_4893"), val = int32(-1)]; + fp16 const_210_promoted = const()[name = string("const_210_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_65 = transpose(perm = var_4860, x = var_4855)[name = string("transpose_196")]; + tensor var_4895 = mul(x = hidden_states_65, y = const_210_promoted)[name = string("op_4895")]; + bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; + tensor input_113 = concat(axis = var_4893, interleave = input_113_interleave_0, values = (hidden_states_65, var_4895))[name = string("input_113")]; + tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; + fp16 var_4890_to_fp16 = const()[name = string("op_4890_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4890_to_fp16, x = input_113)[name = string("normed_101_cast_fp16")]; + tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; + tensor const_213 = const()[name = string("const_213"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419090944)))]; + tensor q_13 = mul(x = normed_103, y = const_213)[name = string("q_13")]; + int32 var_4918 = const()[name = string("op_4918"), val = int32(-1)]; + fp16 const_214_promoted = const()[name = string("const_214_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_67 = transpose(perm = var_4871, x = var_4866)[name = string("transpose_195")]; + tensor var_4920 = mul(x = hidden_states_67, y = const_214_promoted)[name = string("op_4920")]; + bool input_115_interleave_0 = const()[name = string("input_115_interleave_0"), val = bool(false)]; + tensor input_115 = concat(axis = var_4918, interleave = input_115_interleave_0, values = (hidden_states_67, var_4920))[name = string("input_115")]; + tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; + fp16 var_4915_to_fp16 = const()[name = string("op_4915_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4915_to_fp16, x = input_115)[name = string("normed_105_cast_fp16")]; + tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; + tensor const_217 = const()[name = string("const_217"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419091264)))]; + tensor k_13 = mul(x = normed_107, y = const_217)[name = string("k_13")]; + tensor var_4946 = mul(x = q_13, y = cos_5)[name = string("op_4946")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13)[name = string("x2_25")]; + fp16 const_220_promoted = const()[name = string("const_220_promoted"), val = fp16(-0x1p+0)]; + tensor var_4967 = mul(x = x2_25, y = const_220_promoted)[name = string("op_4967")]; + int32 var_4969 = const()[name = string("op_4969"), val = int32(-1)]; + bool var_4970_interleave_0 = const()[name = string("op_4970_interleave_0"), val = bool(false)]; + tensor var_4970 = concat(axis = var_4969, interleave = var_4970_interleave_0, values = (var_4967, x1_25))[name = string("op_4970")]; + tensor var_4971 = mul(x = var_4970, y = sin_5)[name = string("op_4971")]; + tensor query_states_51 = add(x = var_4946, y = var_4971)[name = string("query_states_51")]; + tensor var_4974 = mul(x = k_13, y = cos_5)[name = string("op_4974")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13)[name = string("x2_27")]; + fp16 const_223_promoted = const()[name = string("const_223_promoted"), val = fp16(-0x1p+0)]; + tensor var_4995 = mul(x = x2_27, y = const_223_promoted)[name = string("op_4995")]; + int32 var_4997 = const()[name = string("op_4997"), val = int32(-1)]; + bool var_4998_interleave_0 = const()[name = string("op_4998_interleave_0"), val = bool(false)]; + tensor var_4998 = concat(axis = var_4997, interleave = var_4998_interleave_0, values = (var_4995, x1_27))[name = string("op_4998")]; + tensor var_4999 = mul(x = var_4998, y = sin_5)[name = string("op_4999")]; + tensor key_states_63 = add(x = var_4974, y = var_4999)[name = string("key_states_63")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_1781, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = key_states_63, x = coreml_update_state_67)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_68")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([34])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([35])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_1781, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_51 = transpose(perm = var_4882, x = var_4877)[name = string("transpose_194")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_51, x = coreml_update_state_68)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_69")]; + tensor var_5070_begin_0 = const()[name = string("op_5070_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_5070_end_0 = const()[name = string("op_5070_end_0"), val = tensor([7, 8, 4096, 128])]; + tensor var_5070_end_mask_0 = const()[name = string("op_5070_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5070_cast_fp16 = slice_by_index(begin = var_5070_begin_0, end = var_5070_end_0, end_mask = var_5070_end_mask_0, x = coreml_update_state_69)[name = string("op_5070_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_5070_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_5077_begin_0 = const()[name = string("op_5077_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_5077_end_0 = const()[name = string("op_5077_end_0"), val = tensor([35, 8, 4096, 128])]; + tensor var_5077_end_mask_0 = const()[name = string("op_5077_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5077_cast_fp16 = slice_by_index(begin = var_5077_begin_0, end = var_5077_end_0, end_mask = var_5077_end_mask_0, x = coreml_update_state_69)[name = string("op_5077_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_5077_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_99_axes_0 = const()[name = string("x_99_axes_0"), val = tensor([1])]; + tensor x_99_cast_fp16 = expand_dims(axes = x_99_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_99_cast_fp16")]; + tensor var_5106 = const()[name = string("op_5106"), val = tensor([1, 2, 1, 1])]; + tensor x_101_cast_fp16 = tile(reps = var_5106, x = x_99_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_5118 = const()[name = string("op_5118"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_67_cast_fp16 = reshape(shape = var_5118, x = x_101_cast_fp16)[name = string("key_states_67_cast_fp16")]; + tensor x_105_axes_0 = const()[name = string("x_105_axes_0"), val = tensor([1])]; + tensor x_105_cast_fp16 = expand_dims(axes = x_105_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_105_cast_fp16")]; + tensor var_5126 = const()[name = string("op_5126"), val = tensor([1, 2, 1, 1])]; + tensor x_107_cast_fp16 = tile(reps = var_5126, x = x_105_cast_fp16)[name = string("x_107_cast_fp16")]; + bool var_5153_transpose_x_0 = const()[name = string("op_5153_transpose_x_0"), val = bool(false)]; + bool var_5153_transpose_y_0 = const()[name = string("op_5153_transpose_y_0"), val = bool(true)]; + tensor var_5153 = matmul(transpose_x = var_5153_transpose_x_0, transpose_y = var_5153_transpose_y_0, x = query_states_51, y = key_states_67_cast_fp16)[name = string("op_5153")]; + fp16 var_5154_to_fp16 = const()[name = string("op_5154_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_5153, y = var_5154_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("attn_weights_27_cast_fp16")]; + int32 var_5189 = const()[name = string("op_5189"), val = int32(-1)]; + tensor var_5191_cast_fp16 = softmax(axis = var_5189, x = attn_weights_27_cast_fp16)[name = string("op_5191_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([16, 64, 4096])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_5191_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([16, 4096, 128])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_107_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 16, 64, 128])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_5203_perm_0 = const()[name = string("op_5203_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5222 = const()[name = string("op_5222"), val = tensor([1, 64, 2048])]; + tensor var_5203_cast_fp16 = transpose(perm = var_5203_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_193")]; + tensor attn_output_65_cast_fp16 = reshape(shape = var_5222, x = var_5203_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_5227 = const()[name = string("op_5227"), val = tensor([0, 2, 1])]; + string var_5243_pad_type_0 = const()[name = string("op_5243_pad_type_0"), val = string("valid")]; + int32 var_5243_groups_0 = const()[name = string("op_5243_groups_0"), val = int32(1)]; + tensor var_5243_strides_0 = const()[name = string("op_5243_strides_0"), val = tensor([1])]; + tensor var_5243_pad_0 = const()[name = string("op_5243_pad_0"), val = tensor([0, 0])]; + tensor var_5243_dilations_0 = const()[name = string("op_5243_dilations_0"), val = tensor([1])]; + tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419091584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421188800))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5228_cast_fp16 = transpose(perm = var_5227, x = attn_output_65_cast_fp16)[name = string("transpose_192")]; + tensor var_5243_cast_fp16 = conv(dilations = var_5243_dilations_0, groups = var_5243_groups_0, pad = var_5243_pad_0, pad_type = var_5243_pad_type_0, strides = var_5243_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_5228_cast_fp16)[name = string("op_5243_cast_fp16")]; + tensor var_5247 = const()[name = string("op_5247"), val = tensor([0, 2, 1])]; + tensor attn_output_69_cast_fp16 = transpose(perm = var_5247, x = var_5243_cast_fp16)[name = string("transpose_191")]; + tensor hidden_states_69_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_69_cast_fp16")]; + int32 var_5260 = const()[name = string("op_5260"), val = int32(-1)]; + fp16 const_235_promoted_to_fp16 = const()[name = string("const_235_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5262_cast_fp16 = mul(x = hidden_states_69_cast_fp16, y = const_235_promoted_to_fp16)[name = string("op_5262_cast_fp16")]; + bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; + tensor input_119_cast_fp16 = concat(axis = var_5260, interleave = input_119_interleave_0, values = (hidden_states_69_cast_fp16, var_5262_cast_fp16))[name = string("input_119_cast_fp16")]; + tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; + fp16 var_5257_to_fp16 = const()[name = string("op_5257_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_5257_to_fp16, x = input_119_cast_fp16)[name = string("normed_109_cast_fp16")]; + tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; + tensor const_238_promoted_to_fp16 = const()[name = string("const_238_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421254400)))]; + tensor x_109_cast_fp16 = mul(x = normed_111_cast_fp16, y = const_238_promoted_to_fp16)[name = string("x_109_cast_fp16")]; + tensor var_5287 = const()[name = string("op_5287"), val = tensor([0, 2, 1])]; + tensor input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor([2])]; + tensor var_5288 = transpose(perm = var_5287, x = x_109_cast_fp16)[name = string("transpose_190")]; + tensor input_121 = expand_dims(axes = input_121_axes_0, x = var_5288)[name = string("input_121")]; + string input_123_pad_type_0 = const()[name = string("input_123_pad_type_0"), val = string("valid")]; + tensor input_123_strides_0 = const()[name = string("input_123_strides_0"), val = tensor([1, 1])]; + tensor input_123_pad_0 = const()[name = string("input_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_123_dilations_0 = const()[name = string("input_123_dilations_0"), val = tensor([1, 1])]; + int32 input_123_groups_0 = const()[name = string("input_123_groups_0"), val = int32(1)]; + tensor input_123 = conv(dilations = input_123_dilations_0, groups = input_123_groups_0, pad = input_123_pad_0, pad_type = input_123_pad_type_0, strides = input_123_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_121)[name = string("input_123")]; + string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; + tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; + tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; + int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; + tensor b_13 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_121)[name = string("b_13")]; + tensor c_13 = silu(x = input_123)[name = string("c_13")]; + tensor input_125 = mul(x = c_13, y = b_13)[name = string("input_125")]; + string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; + tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; + tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; + int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; + tensor e_13 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_125)[name = string("e_13")]; + tensor var_5310_axes_0 = const()[name = string("op_5310_axes_0"), val = tensor([2])]; + tensor var_5310 = squeeze(axes = var_5310_axes_0, x = e_13)[name = string("op_5310")]; + tensor var_5311 = const()[name = string("op_5311"), val = tensor([0, 2, 1])]; + tensor var_5312 = transpose(perm = var_5311, x = var_5310)[name = string("transpose_189")]; + tensor hidden_states_71_cast_fp16 = add(x = hidden_states_69_cast_fp16, y = var_5312)[name = string("hidden_states_71_cast_fp16")]; + int32 var_5324 = const()[name = string("op_5324"), val = int32(-1)]; + fp16 const_239_promoted_to_fp16 = const()[name = string("const_239_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5326_cast_fp16 = mul(x = hidden_states_71_cast_fp16, y = const_239_promoted_to_fp16)[name = string("op_5326_cast_fp16")]; + bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; + tensor input_127_cast_fp16 = concat(axis = var_5324, interleave = input_127_interleave_0, values = (hidden_states_71_cast_fp16, var_5326_cast_fp16))[name = string("input_127_cast_fp16")]; + tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; + fp16 var_5321_to_fp16 = const()[name = string("op_5321_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_5321_to_fp16, x = input_127_cast_fp16)[name = string("normed_113_cast_fp16")]; + tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; + tensor const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421256512)))]; + tensor hidden_states_73_cast_fp16 = mul(x = normed_115_cast_fp16, y = const_242_promoted_to_fp16)[name = string("hidden_states_73_cast_fp16")]; + tensor var_5349 = const()[name = string("op_5349"), val = tensor([0, 2, 1])]; + tensor var_5352_axes_0 = const()[name = string("op_5352_axes_0"), val = tensor([2])]; + tensor var_5350_cast_fp16 = transpose(perm = var_5349, x = hidden_states_73_cast_fp16)[name = string("transpose_188")]; + tensor var_5352_cast_fp16 = expand_dims(axes = var_5352_axes_0, x = var_5350_cast_fp16)[name = string("op_5352_cast_fp16")]; + string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; + tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; + tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; + int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; + tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_5352_cast_fp16)[name = string("query_states_57")]; + string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; + tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; + tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; + int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; + tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_5352_cast_fp16)[name = string("key_states_71")]; + string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; + tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; + tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; + int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; + tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_5352_cast_fp16)[name = string("value_states_57")]; + tensor var_5394 = const()[name = string("op_5394"), val = tensor([1, 16, 128, 64])]; + tensor var_5395 = reshape(shape = var_5394, x = query_states_57)[name = string("op_5395")]; + tensor var_5400 = const()[name = string("op_5400"), val = tensor([0, 1, 3, 2])]; + tensor var_5405 = const()[name = string("op_5405"), val = tensor([1, 8, 128, 64])]; + tensor var_5406 = reshape(shape = var_5405, x = key_states_71)[name = string("op_5406")]; + tensor var_5411 = const()[name = string("op_5411"), val = tensor([0, 1, 3, 2])]; + tensor var_5416 = const()[name = string("op_5416"), val = tensor([1, 8, 128, 64])]; + tensor var_5417 = reshape(shape = var_5416, x = value_states_57)[name = string("op_5417")]; + tensor var_5422 = const()[name = string("op_5422"), val = tensor([0, 1, 3, 2])]; + int32 var_5433 = const()[name = string("op_5433"), val = int32(-1)]; + fp16 const_244_promoted = const()[name = string("const_244_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_75 = transpose(perm = var_5400, x = var_5395)[name = string("transpose_187")]; + tensor var_5435 = mul(x = hidden_states_75, y = const_244_promoted)[name = string("op_5435")]; + bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; + tensor input_131 = concat(axis = var_5433, interleave = input_131_interleave_0, values = (hidden_states_75, var_5435))[name = string("input_131")]; + tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; + fp16 var_5430_to_fp16 = const()[name = string("op_5430_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_5430_to_fp16, x = input_131)[name = string("normed_117_cast_fp16")]; + tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_119 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119")]; + tensor const_247 = const()[name = string("const_247"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421258624)))]; + tensor q_15 = mul(x = normed_119, y = const_247)[name = string("q_15")]; + int32 var_5458 = const()[name = string("op_5458"), val = int32(-1)]; + fp16 const_248_promoted = const()[name = string("const_248_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_77 = transpose(perm = var_5411, x = var_5406)[name = string("transpose_186")]; + tensor var_5460 = mul(x = hidden_states_77, y = const_248_promoted)[name = string("op_5460")]; + bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; + tensor input_133 = concat(axis = var_5458, interleave = input_133_interleave_0, values = (hidden_states_77, var_5460))[name = string("input_133")]; + tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; + fp16 var_5455_to_fp16 = const()[name = string("op_5455_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_5455_to_fp16, x = input_133)[name = string("normed_121_cast_fp16")]; + tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_123 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123")]; + tensor const_251 = const()[name = string("const_251"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421258944)))]; + tensor k_15 = mul(x = normed_123, y = const_251)[name = string("k_15")]; + tensor var_5486 = mul(x = q_15, y = cos_5)[name = string("op_5486")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15)[name = string("x2_29")]; + fp16 const_254_promoted = const()[name = string("const_254_promoted"), val = fp16(-0x1p+0)]; + tensor var_5507 = mul(x = x2_29, y = const_254_promoted)[name = string("op_5507")]; + int32 var_5509 = const()[name = string("op_5509"), val = int32(-1)]; + bool var_5510_interleave_0 = const()[name = string("op_5510_interleave_0"), val = bool(false)]; + tensor var_5510 = concat(axis = var_5509, interleave = var_5510_interleave_0, values = (var_5507, x1_29))[name = string("op_5510")]; + tensor var_5511 = mul(x = var_5510, y = sin_5)[name = string("op_5511")]; + tensor query_states_59 = add(x = var_5486, y = var_5511)[name = string("query_states_59")]; + tensor var_5514 = mul(x = k_15, y = cos_5)[name = string("op_5514")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15)[name = string("x1_31")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15)[name = string("x2_31")]; + fp16 const_257_promoted = const()[name = string("const_257_promoted"), val = fp16(-0x1p+0)]; + tensor var_5535 = mul(x = x2_31, y = const_257_promoted)[name = string("op_5535")]; + int32 var_5537 = const()[name = string("op_5537"), val = int32(-1)]; + bool var_5538_interleave_0 = const()[name = string("op_5538_interleave_0"), val = bool(false)]; + tensor var_5538 = concat(axis = var_5537, interleave = var_5538_interleave_0, values = (var_5535, x1_31))[name = string("op_5538")]; + tensor var_5539 = mul(x = var_5538, y = sin_5)[name = string("op_5539")]; + tensor key_states_73 = add(x = var_5514, y = var_5539)[name = string("key_states_73")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; + int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; + bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; + tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_1781, concat_129_values3_0))[name = string("concat_129")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = key_states_73, x = coreml_update_state_69)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_70")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([35])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([36])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; + tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; + tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; + int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; + bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; + tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_1781, concat_133_values3_0))[name = string("concat_133")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_59 = transpose(perm = var_5422, x = var_5417)[name = string("transpose_185")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_59, x = coreml_update_state_70)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_71")]; + tensor var_5610_begin_0 = const()[name = string("op_5610_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_5610_end_0 = const()[name = string("op_5610_end_0"), val = tensor([8, 8, 4096, 128])]; + tensor var_5610_end_mask_0 = const()[name = string("op_5610_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5610_cast_fp16 = slice_by_index(begin = var_5610_begin_0, end = var_5610_end_0, end_mask = var_5610_end_mask_0, x = coreml_update_state_71)[name = string("op_5610_cast_fp16")]; + tensor K_layer_cache_15_axes_0 = const()[name = string("K_layer_cache_15_axes_0"), val = tensor([0])]; + tensor K_layer_cache_15_cast_fp16 = squeeze(axes = K_layer_cache_15_axes_0, x = var_5610_cast_fp16)[name = string("K_layer_cache_15_cast_fp16")]; + tensor var_5617_begin_0 = const()[name = string("op_5617_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_5617_end_0 = const()[name = string("op_5617_end_0"), val = tensor([36, 8, 4096, 128])]; + tensor var_5617_end_mask_0 = const()[name = string("op_5617_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_5617_cast_fp16 = slice_by_index(begin = var_5617_begin_0, end = var_5617_end_0, end_mask = var_5617_end_mask_0, x = coreml_update_state_71)[name = string("op_5617_cast_fp16")]; + tensor V_layer_cache_15_axes_0 = const()[name = string("V_layer_cache_15_axes_0"), val = tensor([0])]; + tensor V_layer_cache_15_cast_fp16 = squeeze(axes = V_layer_cache_15_axes_0, x = var_5617_cast_fp16)[name = string("V_layer_cache_15_cast_fp16")]; + tensor x_115_axes_0 = const()[name = string("x_115_axes_0"), val = tensor([1])]; + tensor x_115_cast_fp16 = expand_dims(axes = x_115_axes_0, x = K_layer_cache_15_cast_fp16)[name = string("x_115_cast_fp16")]; + tensor var_5646 = const()[name = string("op_5646"), val = tensor([1, 2, 1, 1])]; + tensor x_117_cast_fp16 = tile(reps = var_5646, x = x_115_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_5658 = const()[name = string("op_5658"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_77_cast_fp16 = reshape(shape = var_5658, x = x_117_cast_fp16)[name = string("key_states_77_cast_fp16")]; + tensor x_121_axes_0 = const()[name = string("x_121_axes_0"), val = tensor([1])]; + tensor x_121_cast_fp16 = expand_dims(axes = x_121_axes_0, x = V_layer_cache_15_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_5666 = const()[name = string("op_5666"), val = tensor([1, 2, 1, 1])]; + tensor x_123_cast_fp16 = tile(reps = var_5666, x = x_121_cast_fp16)[name = string("x_123_cast_fp16")]; + bool var_5693_transpose_x_0 = const()[name = string("op_5693_transpose_x_0"), val = bool(false)]; + bool var_5693_transpose_y_0 = const()[name = string("op_5693_transpose_y_0"), val = bool(true)]; + tensor var_5693 = matmul(transpose_x = var_5693_transpose_x_0, transpose_y = var_5693_transpose_y_0, x = query_states_59, y = key_states_77_cast_fp16)[name = string("op_5693")]; + fp16 var_5694_to_fp16 = const()[name = string("op_5694_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_29_cast_fp16 = mul(x = var_5693, y = var_5694_to_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("attn_weights_31_cast_fp16")]; + int32 var_5729 = const()[name = string("op_5729"), val = int32(-1)]; + tensor var_5731_cast_fp16 = softmax(axis = var_5729, x = attn_weights_31_cast_fp16)[name = string("op_5731_cast_fp16")]; + tensor concat_138 = const()[name = string("concat_138"), val = tensor([16, 64, 4096])]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_5731_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor concat_139 = const()[name = string("concat_139"), val = tensor([16, 4096, 128])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_123_cast_fp16)[name = string("reshape_22_cast_fp16")]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 16, 64, 128])]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor var_5743_perm_0 = const()[name = string("op_5743_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_5762 = const()[name = string("op_5762"), val = tensor([1, 64, 2048])]; + tensor var_5743_cast_fp16 = transpose(perm = var_5743_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_184")]; + tensor attn_output_75_cast_fp16 = reshape(shape = var_5762, x = var_5743_cast_fp16)[name = string("attn_output_75_cast_fp16")]; + tensor var_5767 = const()[name = string("op_5767"), val = tensor([0, 2, 1])]; + string var_5783_pad_type_0 = const()[name = string("op_5783_pad_type_0"), val = string("valid")]; + int32 var_5783_groups_0 = const()[name = string("op_5783_groups_0"), val = int32(1)]; + tensor var_5783_strides_0 = const()[name = string("op_5783_strides_0"), val = tensor([1])]; + tensor var_5783_pad_0 = const()[name = string("op_5783_pad_0"), val = tensor([0, 0])]; + tensor var_5783_dilations_0 = const()[name = string("op_5783_dilations_0"), val = tensor([1])]; + tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421259264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423356480))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_5768_cast_fp16 = transpose(perm = var_5767, x = attn_output_75_cast_fp16)[name = string("transpose_183")]; + tensor var_5783_cast_fp16 = conv(dilations = var_5783_dilations_0, groups = var_5783_groups_0, pad = var_5783_pad_0, pad_type = var_5783_pad_type_0, strides = var_5783_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_5768_cast_fp16)[name = string("op_5783_cast_fp16")]; + tensor var_5787 = const()[name = string("op_5787"), val = tensor([0, 2, 1])]; + tensor attn_output_79_cast_fp16 = transpose(perm = var_5787, x = var_5783_cast_fp16)[name = string("transpose_182")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + int32 var_5800 = const()[name = string("op_5800"), val = int32(-1)]; + fp16 const_269_promoted_to_fp16 = const()[name = string("const_269_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5802_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = const_269_promoted_to_fp16)[name = string("op_5802_cast_fp16")]; + bool input_137_interleave_0 = const()[name = string("input_137_interleave_0"), val = bool(false)]; + tensor input_137_cast_fp16 = concat(axis = var_5800, interleave = input_137_interleave_0, values = (hidden_states_79_cast_fp16, var_5802_cast_fp16))[name = string("input_137_cast_fp16")]; + tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; + fp16 var_5797_to_fp16 = const()[name = string("op_5797_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_5797_to_fp16, x = input_137_cast_fp16)[name = string("normed_125_cast_fp16")]; + tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_127_cast_fp16 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127_cast_fp16")]; + tensor const_272_promoted_to_fp16 = const()[name = string("const_272_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423422080)))]; + tensor x_125_cast_fp16 = mul(x = normed_127_cast_fp16, y = const_272_promoted_to_fp16)[name = string("x_125_cast_fp16")]; + tensor var_5827 = const()[name = string("op_5827"), val = tensor([0, 2, 1])]; + tensor input_139_axes_0 = const()[name = string("input_139_axes_0"), val = tensor([2])]; + tensor var_5828 = transpose(perm = var_5827, x = x_125_cast_fp16)[name = string("transpose_181")]; + tensor input_139 = expand_dims(axes = input_139_axes_0, x = var_5828)[name = string("input_139")]; + string input_141_pad_type_0 = const()[name = string("input_141_pad_type_0"), val = string("valid")]; + tensor input_141_strides_0 = const()[name = string("input_141_strides_0"), val = tensor([1, 1])]; + tensor input_141_pad_0 = const()[name = string("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_141_dilations_0 = const()[name = string("input_141_dilations_0"), val = tensor([1, 1])]; + int32 input_141_groups_0 = const()[name = string("input_141_groups_0"), val = int32(1)]; + tensor input_141 = conv(dilations = input_141_dilations_0, groups = input_141_groups_0, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = input_141_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_139)[name = string("input_141")]; + string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; + tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; + tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; + int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; + tensor b_15 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_139)[name = string("b_15")]; + tensor c_15 = silu(x = input_141)[name = string("c_15")]; + tensor input_143 = mul(x = c_15, y = b_15)[name = string("input_143")]; + string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; + tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; + tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; + int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; + tensor e_15 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_143)[name = string("e_15")]; + tensor var_5850_axes_0 = const()[name = string("op_5850_axes_0"), val = tensor([2])]; + tensor var_5850 = squeeze(axes = var_5850_axes_0, x = e_15)[name = string("op_5850")]; + tensor var_5851 = const()[name = string("op_5851"), val = tensor([0, 2, 1])]; + tensor var_5852 = transpose(perm = var_5851, x = var_5850)[name = string("transpose_180")]; + tensor hidden_states_81_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = var_5852)[name = string("hidden_states_81_cast_fp16")]; + int32 var_5864 = const()[name = string("op_5864"), val = int32(-1)]; + fp16 const_273_promoted_to_fp16 = const()[name = string("const_273_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5866_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_273_promoted_to_fp16)[name = string("op_5866_cast_fp16")]; + bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; + tensor input_145_cast_fp16 = concat(axis = var_5864, interleave = input_145_interleave_0, values = (hidden_states_81_cast_fp16, var_5866_cast_fp16))[name = string("input_145_cast_fp16")]; + tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; + fp16 var_5861_to_fp16 = const()[name = string("op_5861_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_5861_to_fp16, x = input_145_cast_fp16)[name = string("normed_129_cast_fp16")]; + tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_131_cast_fp16 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131_cast_fp16")]; + tensor const_276_promoted_to_fp16 = const()[name = string("const_276_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423424192)))]; + tensor hidden_states_83_cast_fp16 = mul(x = normed_131_cast_fp16, y = const_276_promoted_to_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor var_5889 = const()[name = string("op_5889"), val = tensor([0, 2, 1])]; + tensor var_5892_axes_0 = const()[name = string("op_5892_axes_0"), val = tensor([2])]; + tensor var_5890_cast_fp16 = transpose(perm = var_5889, x = hidden_states_83_cast_fp16)[name = string("transpose_179")]; + tensor var_5892_cast_fp16 = expand_dims(axes = var_5892_axes_0, x = var_5890_cast_fp16)[name = string("op_5892_cast_fp16")]; + string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; + tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; + tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; + int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; + tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_5892_cast_fp16)[name = string("query_states_65")]; + string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; + tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; + tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; + int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; + tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_5892_cast_fp16)[name = string("key_states_81")]; + string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; + tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; + tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; + int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; + tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_5892_cast_fp16)[name = string("value_states_65")]; + tensor var_5934 = const()[name = string("op_5934"), val = tensor([1, 16, 128, 64])]; + tensor var_5935 = reshape(shape = var_5934, x = query_states_65)[name = string("op_5935")]; + tensor var_5940 = const()[name = string("op_5940"), val = tensor([0, 1, 3, 2])]; + tensor var_5945 = const()[name = string("op_5945"), val = tensor([1, 8, 128, 64])]; + tensor var_5946 = reshape(shape = var_5945, x = key_states_81)[name = string("op_5946")]; + tensor var_5951 = const()[name = string("op_5951"), val = tensor([0, 1, 3, 2])]; + tensor var_5956 = const()[name = string("op_5956"), val = tensor([1, 8, 128, 64])]; + tensor var_5957 = reshape(shape = var_5956, x = value_states_65)[name = string("op_5957")]; + tensor var_5962 = const()[name = string("op_5962"), val = tensor([0, 1, 3, 2])]; + int32 var_5973 = const()[name = string("op_5973"), val = int32(-1)]; + fp16 const_278_promoted = const()[name = string("const_278_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_85 = transpose(perm = var_5940, x = var_5935)[name = string("transpose_178")]; + tensor var_5975 = mul(x = hidden_states_85, y = const_278_promoted)[name = string("op_5975")]; + bool input_149_interleave_0 = const()[name = string("input_149_interleave_0"), val = bool(false)]; + tensor input_149 = concat(axis = var_5973, interleave = input_149_interleave_0, values = (hidden_states_85, var_5975))[name = string("input_149")]; + tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; + fp16 var_5970_to_fp16 = const()[name = string("op_5970_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5970_to_fp16, x = input_149)[name = string("normed_133_cast_fp16")]; + tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_135 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135")]; + tensor const_281 = const()[name = string("const_281"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423426304)))]; + tensor q_17 = mul(x = normed_135, y = const_281)[name = string("q_17")]; + int32 var_5998 = const()[name = string("op_5998"), val = int32(-1)]; + fp16 const_282_promoted = const()[name = string("const_282_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_87 = transpose(perm = var_5951, x = var_5946)[name = string("transpose_177")]; + tensor var_6000 = mul(x = hidden_states_87, y = const_282_promoted)[name = string("op_6000")]; + bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; + tensor input_151 = concat(axis = var_5998, interleave = input_151_interleave_0, values = (hidden_states_87, var_6000))[name = string("input_151")]; + tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; + fp16 var_5995_to_fp16 = const()[name = string("op_5995_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5995_to_fp16, x = input_151)[name = string("normed_137_cast_fp16")]; + tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_139 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139")]; + tensor const_285 = const()[name = string("const_285"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423426624)))]; + tensor k_17 = mul(x = normed_139, y = const_285)[name = string("k_17")]; + tensor var_6026 = mul(x = q_17, y = cos_5)[name = string("op_6026")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17)[name = string("x1_33")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17)[name = string("x2_33")]; + fp16 const_288_promoted = const()[name = string("const_288_promoted"), val = fp16(-0x1p+0)]; + tensor var_6047 = mul(x = x2_33, y = const_288_promoted)[name = string("op_6047")]; + int32 var_6049 = const()[name = string("op_6049"), val = int32(-1)]; + bool var_6050_interleave_0 = const()[name = string("op_6050_interleave_0"), val = bool(false)]; + tensor var_6050 = concat(axis = var_6049, interleave = var_6050_interleave_0, values = (var_6047, x1_33))[name = string("op_6050")]; + tensor var_6051 = mul(x = var_6050, y = sin_5)[name = string("op_6051")]; + tensor query_states_67 = add(x = var_6026, y = var_6051)[name = string("query_states_67")]; + tensor var_6054 = mul(x = k_17, y = cos_5)[name = string("op_6054")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17)[name = string("x1_35")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17)[name = string("x2_35")]; + fp16 const_291_promoted = const()[name = string("const_291_promoted"), val = fp16(-0x1p+0)]; + tensor var_6075 = mul(x = x2_35, y = const_291_promoted)[name = string("op_6075")]; + int32 var_6077 = const()[name = string("op_6077"), val = int32(-1)]; + bool var_6078_interleave_0 = const()[name = string("op_6078_interleave_0"), val = bool(false)]; + tensor var_6078 = concat(axis = var_6077, interleave = var_6078_interleave_0, values = (var_6075, x1_35))[name = string("op_6078")]; + tensor var_6079 = mul(x = var_6078, y = sin_5)[name = string("op_6079")]; + tensor key_states_83 = add(x = var_6054, y = var_6079)[name = string("key_states_83")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([8])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([9])]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, var_1781, concat_147_values3_0))[name = string("concat_147")]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_0_internal_tensor_assign_17_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_0_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_17_stride_0, update = key_states_83, x = coreml_update_state_71)[name = string("model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_72")]; + tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([36])]; + tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; + tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; + tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([37])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; + tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; + tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, var_1781, concat_151_values3_0))[name = string("concat_151")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_67 = transpose(perm = var_5962, x = var_5957)[name = string("transpose_176")]; + tensor model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_0_internal_tensor_assign_18_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_0_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_18_stride_0, update = value_states_67, x = coreml_update_state_72)[name = string("model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_73")]; + tensor var_6150_begin_0 = const()[name = string("op_6150_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor var_6150_end_0 = const()[name = string("op_6150_end_0"), val = tensor([9, 8, 4096, 128])]; + tensor var_6150_end_mask_0 = const()[name = string("op_6150_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6150_cast_fp16 = slice_by_index(begin = var_6150_begin_0, end = var_6150_end_0, end_mask = var_6150_end_mask_0, x = coreml_update_state_73)[name = string("op_6150_cast_fp16")]; + tensor K_layer_cache_17_axes_0 = const()[name = string("K_layer_cache_17_axes_0"), val = tensor([0])]; + tensor K_layer_cache_17_cast_fp16 = squeeze(axes = K_layer_cache_17_axes_0, x = var_6150_cast_fp16)[name = string("K_layer_cache_17_cast_fp16")]; + tensor var_6157_begin_0 = const()[name = string("op_6157_begin_0"), val = tensor([36, 0, 0, 0])]; + tensor var_6157_end_0 = const()[name = string("op_6157_end_0"), val = tensor([37, 8, 4096, 128])]; + tensor var_6157_end_mask_0 = const()[name = string("op_6157_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6157_cast_fp16 = slice_by_index(begin = var_6157_begin_0, end = var_6157_end_0, end_mask = var_6157_end_mask_0, x = coreml_update_state_73)[name = string("op_6157_cast_fp16")]; + tensor V_layer_cache_17_axes_0 = const()[name = string("V_layer_cache_17_axes_0"), val = tensor([0])]; + tensor V_layer_cache_17_cast_fp16 = squeeze(axes = V_layer_cache_17_axes_0, x = var_6157_cast_fp16)[name = string("V_layer_cache_17_cast_fp16")]; + tensor x_131_axes_0 = const()[name = string("x_131_axes_0"), val = tensor([1])]; + tensor x_131_cast_fp16 = expand_dims(axes = x_131_axes_0, x = K_layer_cache_17_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_6186 = const()[name = string("op_6186"), val = tensor([1, 2, 1, 1])]; + tensor x_133_cast_fp16 = tile(reps = var_6186, x = x_131_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_6198 = const()[name = string("op_6198"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_87_cast_fp16 = reshape(shape = var_6198, x = x_133_cast_fp16)[name = string("key_states_87_cast_fp16")]; + tensor x_137_axes_0 = const()[name = string("x_137_axes_0"), val = tensor([1])]; + tensor x_137_cast_fp16 = expand_dims(axes = x_137_axes_0, x = V_layer_cache_17_cast_fp16)[name = string("x_137_cast_fp16")]; + tensor var_6206 = const()[name = string("op_6206"), val = tensor([1, 2, 1, 1])]; + tensor x_139_cast_fp16 = tile(reps = var_6206, x = x_137_cast_fp16)[name = string("x_139_cast_fp16")]; + bool var_6233_transpose_x_0 = const()[name = string("op_6233_transpose_x_0"), val = bool(false)]; + bool var_6233_transpose_y_0 = const()[name = string("op_6233_transpose_y_0"), val = bool(true)]; + tensor var_6233 = matmul(transpose_x = var_6233_transpose_x_0, transpose_y = var_6233_transpose_y_0, x = query_states_67, y = key_states_87_cast_fp16)[name = string("op_6233")]; + fp16 var_6234_to_fp16 = const()[name = string("op_6234_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_33_cast_fp16 = mul(x = var_6233, y = var_6234_to_fp16)[name = string("attn_weights_33_cast_fp16")]; + tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = causal_mask)[name = string("attn_weights_35_cast_fp16")]; + int32 var_6269 = const()[name = string("op_6269"), val = int32(-1)]; + tensor var_6271_cast_fp16 = softmax(axis = var_6269, x = attn_weights_35_cast_fp16)[name = string("op_6271_cast_fp16")]; + tensor concat_156 = const()[name = string("concat_156"), val = tensor([16, 64, 4096])]; + tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_6271_cast_fp16)[name = string("reshape_24_cast_fp16")]; + tensor concat_157 = const()[name = string("concat_157"), val = tensor([16, 4096, 128])]; + tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_139_cast_fp16)[name = string("reshape_25_cast_fp16")]; + bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; + bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; + tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; + tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 16, 64, 128])]; + tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; + tensor var_6283_perm_0 = const()[name = string("op_6283_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6302 = const()[name = string("op_6302"), val = tensor([1, 64, 2048])]; + tensor var_6283_cast_fp16 = transpose(perm = var_6283_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_175")]; + tensor attn_output_85_cast_fp16 = reshape(shape = var_6302, x = var_6283_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_6307 = const()[name = string("op_6307"), val = tensor([0, 2, 1])]; + string var_6323_pad_type_0 = const()[name = string("op_6323_pad_type_0"), val = string("valid")]; + int32 var_6323_groups_0 = const()[name = string("op_6323_groups_0"), val = int32(1)]; + tensor var_6323_strides_0 = const()[name = string("op_6323_strides_0"), val = tensor([1])]; + tensor var_6323_pad_0 = const()[name = string("op_6323_pad_0"), val = tensor([0, 0])]; + tensor var_6323_dilations_0 = const()[name = string("op_6323_dilations_0"), val = tensor([1])]; + tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423426944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425524160))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6308_cast_fp16 = transpose(perm = var_6307, x = attn_output_85_cast_fp16)[name = string("transpose_174")]; + tensor var_6323_cast_fp16 = conv(dilations = var_6323_dilations_0, groups = var_6323_groups_0, pad = var_6323_pad_0, pad_type = var_6323_pad_type_0, strides = var_6323_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_6308_cast_fp16)[name = string("op_6323_cast_fp16")]; + tensor var_6327 = const()[name = string("op_6327"), val = tensor([0, 2, 1])]; + tensor attn_output_89_cast_fp16 = transpose(perm = var_6327, x = var_6323_cast_fp16)[name = string("transpose_173")]; + tensor hidden_states_89_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; + int32 var_6340 = const()[name = string("op_6340"), val = int32(-1)]; + fp16 const_303_promoted_to_fp16 = const()[name = string("const_303_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6342_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = const_303_promoted_to_fp16)[name = string("op_6342_cast_fp16")]; + bool input_155_interleave_0 = const()[name = string("input_155_interleave_0"), val = bool(false)]; + tensor input_155_cast_fp16 = concat(axis = var_6340, interleave = input_155_interleave_0, values = (hidden_states_89_cast_fp16, var_6342_cast_fp16))[name = string("input_155_cast_fp16")]; + tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; + fp16 var_6337_to_fp16 = const()[name = string("op_6337_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_6337_to_fp16, x = input_155_cast_fp16)[name = string("normed_141_cast_fp16")]; + tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; + tensor const_306_promoted_to_fp16 = const()[name = string("const_306_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425589760)))]; + tensor x_141_cast_fp16 = mul(x = normed_143_cast_fp16, y = const_306_promoted_to_fp16)[name = string("x_141_cast_fp16")]; + tensor var_6367 = const()[name = string("op_6367"), val = tensor([0, 2, 1])]; + tensor input_157_axes_0 = const()[name = string("input_157_axes_0"), val = tensor([2])]; + tensor var_6368 = transpose(perm = var_6367, x = x_141_cast_fp16)[name = string("transpose_172")]; + tensor input_157 = expand_dims(axes = input_157_axes_0, x = var_6368)[name = string("input_157")]; + string input_159_pad_type_0 = const()[name = string("input_159_pad_type_0"), val = string("valid")]; + tensor input_159_strides_0 = const()[name = string("input_159_strides_0"), val = tensor([1, 1])]; + tensor input_159_pad_0 = const()[name = string("input_159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_159_dilations_0 = const()[name = string("input_159_dilations_0"), val = tensor([1, 1])]; + int32 input_159_groups_0 = const()[name = string("input_159_groups_0"), val = int32(1)]; + tensor input_159 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_157)[name = string("input_159")]; + string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; + tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; + tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; + int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; + tensor b_17 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_157)[name = string("b_17")]; + tensor c_17 = silu(x = input_159)[name = string("c_17")]; + tensor input_161 = mul(x = c_17, y = b_17)[name = string("input_161")]; + string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; + tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; + tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; + int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; + tensor e_17 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_161)[name = string("e_17")]; + tensor var_6390_axes_0 = const()[name = string("op_6390_axes_0"), val = tensor([2])]; + tensor var_6390 = squeeze(axes = var_6390_axes_0, x = e_17)[name = string("op_6390")]; + tensor var_6391 = const()[name = string("op_6391"), val = tensor([0, 2, 1])]; + tensor var_6392 = transpose(perm = var_6391, x = var_6390)[name = string("transpose_171")]; + tensor hidden_states_91_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = var_6392)[name = string("hidden_states_91_cast_fp16")]; + int32 var_6404 = const()[name = string("op_6404"), val = int32(-1)]; + fp16 const_307_promoted_to_fp16 = const()[name = string("const_307_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6406_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_307_promoted_to_fp16)[name = string("op_6406_cast_fp16")]; + bool input_163_interleave_0 = const()[name = string("input_163_interleave_0"), val = bool(false)]; + tensor input_163_cast_fp16 = concat(axis = var_6404, interleave = input_163_interleave_0, values = (hidden_states_91_cast_fp16, var_6406_cast_fp16))[name = string("input_163_cast_fp16")]; + tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; + fp16 var_6401_to_fp16 = const()[name = string("op_6401_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_6401_to_fp16, x = input_163_cast_fp16)[name = string("normed_145_cast_fp16")]; + tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; + tensor const_310_promoted_to_fp16 = const()[name = string("const_310_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425591872)))]; + tensor hidden_states_93_cast_fp16 = mul(x = normed_147_cast_fp16, y = const_310_promoted_to_fp16)[name = string("hidden_states_93_cast_fp16")]; + tensor var_6429 = const()[name = string("op_6429"), val = tensor([0, 2, 1])]; + tensor var_6432_axes_0 = const()[name = string("op_6432_axes_0"), val = tensor([2])]; + tensor var_6430_cast_fp16 = transpose(perm = var_6429, x = hidden_states_93_cast_fp16)[name = string("transpose_170")]; + tensor var_6432_cast_fp16 = expand_dims(axes = var_6432_axes_0, x = var_6430_cast_fp16)[name = string("op_6432_cast_fp16")]; + string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; + tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; + tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; + int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; + tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_6432_cast_fp16)[name = string("query_states_73")]; + string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; + tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; + tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; + int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; + tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_6432_cast_fp16)[name = string("key_states_91")]; + string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; + tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; + tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; + int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; + tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_6432_cast_fp16)[name = string("value_states_73")]; + tensor var_6474 = const()[name = string("op_6474"), val = tensor([1, 16, 128, 64])]; + tensor var_6475 = reshape(shape = var_6474, x = query_states_73)[name = string("op_6475")]; + tensor var_6480 = const()[name = string("op_6480"), val = tensor([0, 1, 3, 2])]; + tensor var_6485 = const()[name = string("op_6485"), val = tensor([1, 8, 128, 64])]; + tensor var_6486 = reshape(shape = var_6485, x = key_states_91)[name = string("op_6486")]; + tensor var_6491 = const()[name = string("op_6491"), val = tensor([0, 1, 3, 2])]; + tensor var_6496 = const()[name = string("op_6496"), val = tensor([1, 8, 128, 64])]; + tensor var_6497 = reshape(shape = var_6496, x = value_states_73)[name = string("op_6497")]; + tensor var_6502 = const()[name = string("op_6502"), val = tensor([0, 1, 3, 2])]; + int32 var_6513 = const()[name = string("op_6513"), val = int32(-1)]; + fp16 const_312_promoted = const()[name = string("const_312_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_95 = transpose(perm = var_6480, x = var_6475)[name = string("transpose_169")]; + tensor var_6515 = mul(x = hidden_states_95, y = const_312_promoted)[name = string("op_6515")]; + bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; + tensor input_167 = concat(axis = var_6513, interleave = input_167_interleave_0, values = (hidden_states_95, var_6515))[name = string("input_167")]; + tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; + fp16 var_6510_to_fp16 = const()[name = string("op_6510_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_6510_to_fp16, x = input_167)[name = string("normed_149_cast_fp16")]; + tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; + tensor const_315 = const()[name = string("const_315"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425593984)))]; + tensor q_19 = mul(x = normed_151, y = const_315)[name = string("q_19")]; + int32 var_6538 = const()[name = string("op_6538"), val = int32(-1)]; + fp16 const_316_promoted = const()[name = string("const_316_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_97 = transpose(perm = var_6491, x = var_6486)[name = string("transpose_168")]; + tensor var_6540 = mul(x = hidden_states_97, y = const_316_promoted)[name = string("op_6540")]; + bool input_169_interleave_0 = const()[name = string("input_169_interleave_0"), val = bool(false)]; + tensor input_169 = concat(axis = var_6538, interleave = input_169_interleave_0, values = (hidden_states_97, var_6540))[name = string("input_169")]; + tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; + fp16 var_6535_to_fp16 = const()[name = string("op_6535_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_6535_to_fp16, x = input_169)[name = string("normed_153_cast_fp16")]; + tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; + tensor const_319 = const()[name = string("const_319"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425594304)))]; + tensor k_19 = mul(x = normed_155, y = const_319)[name = string("k_19")]; + tensor var_6566 = mul(x = q_19, y = cos_5)[name = string("op_6566")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19)[name = string("x1_37")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19)[name = string("x2_37")]; + fp16 const_322_promoted = const()[name = string("const_322_promoted"), val = fp16(-0x1p+0)]; + tensor var_6587 = mul(x = x2_37, y = const_322_promoted)[name = string("op_6587")]; + int32 var_6589 = const()[name = string("op_6589"), val = int32(-1)]; + bool var_6590_interleave_0 = const()[name = string("op_6590_interleave_0"), val = bool(false)]; + tensor var_6590 = concat(axis = var_6589, interleave = var_6590_interleave_0, values = (var_6587, x1_37))[name = string("op_6590")]; + tensor var_6591 = mul(x = var_6590, y = sin_5)[name = string("op_6591")]; + tensor query_states_75 = add(x = var_6566, y = var_6591)[name = string("query_states_75")]; + tensor var_6594 = mul(x = k_19, y = cos_5)[name = string("op_6594")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19)[name = string("x1_39")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19)[name = string("x2_39")]; + fp16 const_325_promoted = const()[name = string("const_325_promoted"), val = fp16(-0x1p+0)]; + tensor var_6615 = mul(x = x2_39, y = const_325_promoted)[name = string("op_6615")]; + int32 var_6617 = const()[name = string("op_6617"), val = int32(-1)]; + bool var_6618_interleave_0 = const()[name = string("op_6618_interleave_0"), val = bool(false)]; + tensor var_6618 = concat(axis = var_6617, interleave = var_6618_interleave_0, values = (var_6615, x1_39))[name = string("op_6618")]; + tensor var_6619 = mul(x = var_6618, y = sin_5)[name = string("op_6619")]; + tensor key_states_93 = add(x = var_6594, y = var_6619)[name = string("key_states_93")]; + tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([9])]; + tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; + tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([10])]; + int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; + bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; + tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_164")]; + tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; + tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; + int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; + bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; + tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_112, concat_165_values1_0, var_1781, concat_165_values3_0))[name = string("concat_165")]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_0_internal_tensor_assign_19_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_0_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_19_stride_0, update = key_states_93, x = coreml_update_state_73)[name = string("model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_74")]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([37])]; + tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; + tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; + tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([38])]; + int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; + bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; + tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_168")]; + tensor concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = tensor([0])]; + tensor concat_169_values3_0 = const()[name = string("concat_169_values3_0"), val = tensor([0])]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (expand_dims_118, concat_169_values1_0, var_1781, concat_169_values3_0))[name = string("concat_169")]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_75 = transpose(perm = var_6502, x = var_6497)[name = string("transpose_167")]; + tensor model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_0_internal_tensor_assign_20_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_0_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_20_stride_0, update = value_states_75, x = coreml_update_state_74)[name = string("model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_75")]; + tensor var_6690_begin_0 = const()[name = string("op_6690_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor var_6690_end_0 = const()[name = string("op_6690_end_0"), val = tensor([10, 8, 4096, 128])]; + tensor var_6690_end_mask_0 = const()[name = string("op_6690_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6690_cast_fp16 = slice_by_index(begin = var_6690_begin_0, end = var_6690_end_0, end_mask = var_6690_end_mask_0, x = coreml_update_state_75)[name = string("op_6690_cast_fp16")]; + tensor K_layer_cache_19_axes_0 = const()[name = string("K_layer_cache_19_axes_0"), val = tensor([0])]; + tensor K_layer_cache_19_cast_fp16 = squeeze(axes = K_layer_cache_19_axes_0, x = var_6690_cast_fp16)[name = string("K_layer_cache_19_cast_fp16")]; + tensor var_6697_begin_0 = const()[name = string("op_6697_begin_0"), val = tensor([37, 0, 0, 0])]; + tensor var_6697_end_0 = const()[name = string("op_6697_end_0"), val = tensor([38, 8, 4096, 128])]; + tensor var_6697_end_mask_0 = const()[name = string("op_6697_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_6697_cast_fp16 = slice_by_index(begin = var_6697_begin_0, end = var_6697_end_0, end_mask = var_6697_end_mask_0, x = coreml_update_state_75)[name = string("op_6697_cast_fp16")]; + tensor V_layer_cache_19_axes_0 = const()[name = string("V_layer_cache_19_axes_0"), val = tensor([0])]; + tensor V_layer_cache_19_cast_fp16 = squeeze(axes = V_layer_cache_19_axes_0, x = var_6697_cast_fp16)[name = string("V_layer_cache_19_cast_fp16")]; + tensor x_147_axes_0 = const()[name = string("x_147_axes_0"), val = tensor([1])]; + tensor x_147_cast_fp16 = expand_dims(axes = x_147_axes_0, x = K_layer_cache_19_cast_fp16)[name = string("x_147_cast_fp16")]; + tensor var_6726 = const()[name = string("op_6726"), val = tensor([1, 2, 1, 1])]; + tensor x_149_cast_fp16 = tile(reps = var_6726, x = x_147_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_6738 = const()[name = string("op_6738"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_97_cast_fp16 = reshape(shape = var_6738, x = x_149_cast_fp16)[name = string("key_states_97_cast_fp16")]; + tensor x_153_axes_0 = const()[name = string("x_153_axes_0"), val = tensor([1])]; + tensor x_153_cast_fp16 = expand_dims(axes = x_153_axes_0, x = V_layer_cache_19_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_6746 = const()[name = string("op_6746"), val = tensor([1, 2, 1, 1])]; + tensor x_155_cast_fp16 = tile(reps = var_6746, x = x_153_cast_fp16)[name = string("x_155_cast_fp16")]; + bool var_6773_transpose_x_0 = const()[name = string("op_6773_transpose_x_0"), val = bool(false)]; + bool var_6773_transpose_y_0 = const()[name = string("op_6773_transpose_y_0"), val = bool(true)]; + tensor var_6773 = matmul(transpose_x = var_6773_transpose_x_0, transpose_y = var_6773_transpose_y_0, x = query_states_75, y = key_states_97_cast_fp16)[name = string("op_6773")]; + fp16 var_6774_to_fp16 = const()[name = string("op_6774_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_37_cast_fp16 = mul(x = var_6773, y = var_6774_to_fp16)[name = string("attn_weights_37_cast_fp16")]; + tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = causal_mask)[name = string("attn_weights_39_cast_fp16")]; + int32 var_6809 = const()[name = string("op_6809"), val = int32(-1)]; + tensor var_6811_cast_fp16 = softmax(axis = var_6809, x = attn_weights_39_cast_fp16)[name = string("op_6811_cast_fp16")]; + tensor concat_174 = const()[name = string("concat_174"), val = tensor([16, 64, 4096])]; + tensor reshape_27_cast_fp16 = reshape(shape = concat_174, x = var_6811_cast_fp16)[name = string("reshape_27_cast_fp16")]; + tensor concat_175 = const()[name = string("concat_175"), val = tensor([16, 4096, 128])]; + tensor reshape_28_cast_fp16 = reshape(shape = concat_175, x = x_155_cast_fp16)[name = string("reshape_28_cast_fp16")]; + bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; + bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; + tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; + tensor concat_179 = const()[name = string("concat_179"), val = tensor([1, 16, 64, 128])]; + tensor reshape_29_cast_fp16 = reshape(shape = concat_179, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; + tensor var_6823_perm_0 = const()[name = string("op_6823_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_6842 = const()[name = string("op_6842"), val = tensor([1, 64, 2048])]; + tensor var_6823_cast_fp16 = transpose(perm = var_6823_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_166")]; + tensor attn_output_95_cast_fp16 = reshape(shape = var_6842, x = var_6823_cast_fp16)[name = string("attn_output_95_cast_fp16")]; + tensor var_6847 = const()[name = string("op_6847"), val = tensor([0, 2, 1])]; + string var_6863_pad_type_0 = const()[name = string("op_6863_pad_type_0"), val = string("valid")]; + int32 var_6863_groups_0 = const()[name = string("op_6863_groups_0"), val = int32(1)]; + tensor var_6863_strides_0 = const()[name = string("op_6863_strides_0"), val = tensor([1])]; + tensor var_6863_pad_0 = const()[name = string("op_6863_pad_0"), val = tensor([0, 0])]; + tensor var_6863_dilations_0 = const()[name = string("op_6863_dilations_0"), val = tensor([1])]; + tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425594624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427691840))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_6848_cast_fp16 = transpose(perm = var_6847, x = attn_output_95_cast_fp16)[name = string("transpose_165")]; + tensor var_6863_cast_fp16 = conv(dilations = var_6863_dilations_0, groups = var_6863_groups_0, pad = var_6863_pad_0, pad_type = var_6863_pad_type_0, strides = var_6863_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_6848_cast_fp16)[name = string("op_6863_cast_fp16")]; + tensor var_6867 = const()[name = string("op_6867"), val = tensor([0, 2, 1])]; + tensor attn_output_99_cast_fp16 = transpose(perm = var_6867, x = var_6863_cast_fp16)[name = string("transpose_164")]; + tensor hidden_states_99_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; + int32 var_6880 = const()[name = string("op_6880"), val = int32(-1)]; + fp16 const_337_promoted_to_fp16 = const()[name = string("const_337_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6882_cast_fp16 = mul(x = hidden_states_99_cast_fp16, y = const_337_promoted_to_fp16)[name = string("op_6882_cast_fp16")]; + bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; + tensor input_173_cast_fp16 = concat(axis = var_6880, interleave = input_173_interleave_0, values = (hidden_states_99_cast_fp16, var_6882_cast_fp16))[name = string("input_173_cast_fp16")]; + tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; + fp16 var_6877_to_fp16 = const()[name = string("op_6877_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_6877_to_fp16, x = input_173_cast_fp16)[name = string("normed_157_cast_fp16")]; + tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; + tensor const_340_promoted_to_fp16 = const()[name = string("const_340_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427757440)))]; + tensor x_157_cast_fp16 = mul(x = normed_159_cast_fp16, y = const_340_promoted_to_fp16)[name = string("x_157_cast_fp16")]; + tensor var_6907 = const()[name = string("op_6907"), val = tensor([0, 2, 1])]; + tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; + tensor var_6908 = transpose(perm = var_6907, x = x_157_cast_fp16)[name = string("transpose_163")]; + tensor input_175 = expand_dims(axes = input_175_axes_0, x = var_6908)[name = string("input_175")]; + string input_177_pad_type_0 = const()[name = string("input_177_pad_type_0"), val = string("valid")]; + tensor input_177_strides_0 = const()[name = string("input_177_strides_0"), val = tensor([1, 1])]; + tensor input_177_pad_0 = const()[name = string("input_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_177_dilations_0 = const()[name = string("input_177_dilations_0"), val = tensor([1, 1])]; + int32 input_177_groups_0 = const()[name = string("input_177_groups_0"), val = int32(1)]; + tensor input_177 = conv(dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_175)[name = string("input_177")]; + string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; + tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; + tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; + int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; + tensor b_19 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_175)[name = string("b_19")]; + tensor c_19 = silu(x = input_177)[name = string("c_19")]; + tensor input_179 = mul(x = c_19, y = b_19)[name = string("input_179")]; + string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; + tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; + tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; + int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; + tensor e_19 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_179)[name = string("e_19")]; + tensor var_6930_axes_0 = const()[name = string("op_6930_axes_0"), val = tensor([2])]; + tensor var_6930 = squeeze(axes = var_6930_axes_0, x = e_19)[name = string("op_6930")]; + tensor var_6931 = const()[name = string("op_6931"), val = tensor([0, 2, 1])]; + tensor var_6932 = transpose(perm = var_6931, x = var_6930)[name = string("transpose_162")]; + tensor hidden_states_101_cast_fp16 = add(x = hidden_states_99_cast_fp16, y = var_6932)[name = string("hidden_states_101_cast_fp16")]; + int32 var_6944 = const()[name = string("op_6944"), val = int32(-1)]; + fp16 const_341_promoted_to_fp16 = const()[name = string("const_341_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_6946_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = const_341_promoted_to_fp16)[name = string("op_6946_cast_fp16")]; + bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; + tensor input_181_cast_fp16 = concat(axis = var_6944, interleave = input_181_interleave_0, values = (hidden_states_101_cast_fp16, var_6946_cast_fp16))[name = string("input_181_cast_fp16")]; + tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; + fp16 var_6941_to_fp16 = const()[name = string("op_6941_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_6941_to_fp16, x = input_181_cast_fp16)[name = string("normed_161_cast_fp16")]; + tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; + tensor const_344_promoted_to_fp16 = const()[name = string("const_344_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427759552)))]; + tensor hidden_states_103_cast_fp16 = mul(x = normed_163_cast_fp16, y = const_344_promoted_to_fp16)[name = string("hidden_states_103_cast_fp16")]; + tensor var_6969 = const()[name = string("op_6969"), val = tensor([0, 2, 1])]; + tensor var_6972_axes_0 = const()[name = string("op_6972_axes_0"), val = tensor([2])]; + tensor var_6970_cast_fp16 = transpose(perm = var_6969, x = hidden_states_103_cast_fp16)[name = string("transpose_161")]; + tensor var_6972_cast_fp16 = expand_dims(axes = var_6972_axes_0, x = var_6970_cast_fp16)[name = string("op_6972_cast_fp16")]; + string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; + tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; + tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; + int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; + tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_6972_cast_fp16)[name = string("query_states_81")]; + string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; + tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; + tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; + int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; + tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_6972_cast_fp16)[name = string("key_states_101")]; + string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; + tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; + tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; + int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; + tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_6972_cast_fp16)[name = string("value_states_81")]; + tensor var_7014 = const()[name = string("op_7014"), val = tensor([1, 16, 128, 64])]; + tensor var_7015 = reshape(shape = var_7014, x = query_states_81)[name = string("op_7015")]; + tensor var_7020 = const()[name = string("op_7020"), val = tensor([0, 1, 3, 2])]; + tensor var_7025 = const()[name = string("op_7025"), val = tensor([1, 8, 128, 64])]; + tensor var_7026 = reshape(shape = var_7025, x = key_states_101)[name = string("op_7026")]; + tensor var_7031 = const()[name = string("op_7031"), val = tensor([0, 1, 3, 2])]; + tensor var_7036 = const()[name = string("op_7036"), val = tensor([1, 8, 128, 64])]; + tensor var_7037 = reshape(shape = var_7036, x = value_states_81)[name = string("op_7037")]; + tensor var_7042 = const()[name = string("op_7042"), val = tensor([0, 1, 3, 2])]; + int32 var_7053 = const()[name = string("op_7053"), val = int32(-1)]; + fp16 const_346_promoted = const()[name = string("const_346_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_105 = transpose(perm = var_7020, x = var_7015)[name = string("transpose_160")]; + tensor var_7055 = mul(x = hidden_states_105, y = const_346_promoted)[name = string("op_7055")]; + bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; + tensor input_185 = concat(axis = var_7053, interleave = input_185_interleave_0, values = (hidden_states_105, var_7055))[name = string("input_185")]; + tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; + fp16 var_7050_to_fp16 = const()[name = string("op_7050_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_7050_to_fp16, x = input_185)[name = string("normed_165_cast_fp16")]; + tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_167 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167")]; + tensor const_349 = const()[name = string("const_349"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427761664)))]; + tensor q_21 = mul(x = normed_167, y = const_349)[name = string("q_21")]; + int32 var_7078 = const()[name = string("op_7078"), val = int32(-1)]; + fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_107 = transpose(perm = var_7031, x = var_7026)[name = string("transpose_159")]; + tensor var_7080 = mul(x = hidden_states_107, y = const_350_promoted)[name = string("op_7080")]; + bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; + tensor input_187 = concat(axis = var_7078, interleave = input_187_interleave_0, values = (hidden_states_107, var_7080))[name = string("input_187")]; + tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; + fp16 var_7075_to_fp16 = const()[name = string("op_7075_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_7075_to_fp16, x = input_187)[name = string("normed_169_cast_fp16")]; + tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_171 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171")]; + tensor const_353 = const()[name = string("const_353"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427761984)))]; + tensor k_21 = mul(x = normed_171, y = const_353)[name = string("k_21")]; + tensor var_7106 = mul(x = q_21, y = cos_5)[name = string("op_7106")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21)[name = string("x1_41")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21)[name = string("x2_41")]; + fp16 const_356_promoted = const()[name = string("const_356_promoted"), val = fp16(-0x1p+0)]; + tensor var_7127 = mul(x = x2_41, y = const_356_promoted)[name = string("op_7127")]; + int32 var_7129 = const()[name = string("op_7129"), val = int32(-1)]; + bool var_7130_interleave_0 = const()[name = string("op_7130_interleave_0"), val = bool(false)]; + tensor var_7130 = concat(axis = var_7129, interleave = var_7130_interleave_0, values = (var_7127, x1_41))[name = string("op_7130")]; + tensor var_7131 = mul(x = var_7130, y = sin_5)[name = string("op_7131")]; + tensor query_states_83 = add(x = var_7106, y = var_7131)[name = string("query_states_83")]; + tensor var_7134 = mul(x = k_21, y = cos_5)[name = string("op_7134")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21)[name = string("x1_43")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21)[name = string("x2_43")]; + fp16 const_359_promoted = const()[name = string("const_359_promoted"), val = fp16(-0x1p+0)]; + tensor var_7155 = mul(x = x2_43, y = const_359_promoted)[name = string("op_7155")]; + int32 var_7157 = const()[name = string("op_7157"), val = int32(-1)]; + bool var_7158_interleave_0 = const()[name = string("op_7158_interleave_0"), val = bool(false)]; + tensor var_7158 = concat(axis = var_7157, interleave = var_7158_interleave_0, values = (var_7155, x1_43))[name = string("op_7158")]; + tensor var_7159 = mul(x = var_7158, y = sin_5)[name = string("op_7159")]; + tensor key_states_103 = add(x = var_7134, y = var_7159)[name = string("key_states_103")]; + tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([10])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; + tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([11])]; + int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; + bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; + tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_182")]; + tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; + tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; + int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; + bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; + tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_124, concat_183_values1_0, var_1781, concat_183_values3_0))[name = string("concat_183")]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_0_internal_tensor_assign_21_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_0_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_21_stride_0, update = key_states_103, x = coreml_update_state_75)[name = string("model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_76")]; + tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([38])]; + tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; + tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([39])]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_186")]; + tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; + tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; + int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; + bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; + tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_130, concat_187_values1_0, var_1781, concat_187_values3_0))[name = string("concat_187")]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_83 = transpose(perm = var_7042, x = var_7037)[name = string("transpose_158")]; + tensor model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_0_internal_tensor_assign_22_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_0_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_22_stride_0, update = value_states_83, x = coreml_update_state_76)[name = string("model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_77")]; + tensor var_7230_begin_0 = const()[name = string("op_7230_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor var_7230_end_0 = const()[name = string("op_7230_end_0"), val = tensor([11, 8, 4096, 128])]; + tensor var_7230_end_mask_0 = const()[name = string("op_7230_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7230_cast_fp16 = slice_by_index(begin = var_7230_begin_0, end = var_7230_end_0, end_mask = var_7230_end_mask_0, x = coreml_update_state_77)[name = string("op_7230_cast_fp16")]; + tensor K_layer_cache_21_axes_0 = const()[name = string("K_layer_cache_21_axes_0"), val = tensor([0])]; + tensor K_layer_cache_21_cast_fp16 = squeeze(axes = K_layer_cache_21_axes_0, x = var_7230_cast_fp16)[name = string("K_layer_cache_21_cast_fp16")]; + tensor var_7237_begin_0 = const()[name = string("op_7237_begin_0"), val = tensor([38, 0, 0, 0])]; + tensor var_7237_end_0 = const()[name = string("op_7237_end_0"), val = tensor([39, 8, 4096, 128])]; + tensor var_7237_end_mask_0 = const()[name = string("op_7237_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7237_cast_fp16 = slice_by_index(begin = var_7237_begin_0, end = var_7237_end_0, end_mask = var_7237_end_mask_0, x = coreml_update_state_77)[name = string("op_7237_cast_fp16")]; + tensor V_layer_cache_21_axes_0 = const()[name = string("V_layer_cache_21_axes_0"), val = tensor([0])]; + tensor V_layer_cache_21_cast_fp16 = squeeze(axes = V_layer_cache_21_axes_0, x = var_7237_cast_fp16)[name = string("V_layer_cache_21_cast_fp16")]; + tensor x_163_axes_0 = const()[name = string("x_163_axes_0"), val = tensor([1])]; + tensor x_163_cast_fp16 = expand_dims(axes = x_163_axes_0, x = K_layer_cache_21_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor var_7266 = const()[name = string("op_7266"), val = tensor([1, 2, 1, 1])]; + tensor x_165_cast_fp16 = tile(reps = var_7266, x = x_163_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor var_7278 = const()[name = string("op_7278"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_107_cast_fp16 = reshape(shape = var_7278, x = x_165_cast_fp16)[name = string("key_states_107_cast_fp16")]; + tensor x_169_axes_0 = const()[name = string("x_169_axes_0"), val = tensor([1])]; + tensor x_169_cast_fp16 = expand_dims(axes = x_169_axes_0, x = V_layer_cache_21_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_7286 = const()[name = string("op_7286"), val = tensor([1, 2, 1, 1])]; + tensor x_171_cast_fp16 = tile(reps = var_7286, x = x_169_cast_fp16)[name = string("x_171_cast_fp16")]; + bool var_7313_transpose_x_0 = const()[name = string("op_7313_transpose_x_0"), val = bool(false)]; + bool var_7313_transpose_y_0 = const()[name = string("op_7313_transpose_y_0"), val = bool(true)]; + tensor var_7313 = matmul(transpose_x = var_7313_transpose_x_0, transpose_y = var_7313_transpose_y_0, x = query_states_83, y = key_states_107_cast_fp16)[name = string("op_7313")]; + fp16 var_7314_to_fp16 = const()[name = string("op_7314_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_41_cast_fp16 = mul(x = var_7313, y = var_7314_to_fp16)[name = string("attn_weights_41_cast_fp16")]; + tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = causal_mask)[name = string("attn_weights_43_cast_fp16")]; + int32 var_7349 = const()[name = string("op_7349"), val = int32(-1)]; + tensor var_7351_cast_fp16 = softmax(axis = var_7349, x = attn_weights_43_cast_fp16)[name = string("op_7351_cast_fp16")]; + tensor concat_192 = const()[name = string("concat_192"), val = tensor([16, 64, 4096])]; + tensor reshape_30_cast_fp16 = reshape(shape = concat_192, x = var_7351_cast_fp16)[name = string("reshape_30_cast_fp16")]; + tensor concat_193 = const()[name = string("concat_193"), val = tensor([16, 4096, 128])]; + tensor reshape_31_cast_fp16 = reshape(shape = concat_193, x = x_171_cast_fp16)[name = string("reshape_31_cast_fp16")]; + bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; + bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; + tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; + tensor concat_197 = const()[name = string("concat_197"), val = tensor([1, 16, 64, 128])]; + tensor reshape_32_cast_fp16 = reshape(shape = concat_197, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; + tensor var_7363_perm_0 = const()[name = string("op_7363_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7382 = const()[name = string("op_7382"), val = tensor([1, 64, 2048])]; + tensor var_7363_cast_fp16 = transpose(perm = var_7363_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_157")]; + tensor attn_output_105_cast_fp16 = reshape(shape = var_7382, x = var_7363_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_7387 = const()[name = string("op_7387"), val = tensor([0, 2, 1])]; + string var_7403_pad_type_0 = const()[name = string("op_7403_pad_type_0"), val = string("valid")]; + int32 var_7403_groups_0 = const()[name = string("op_7403_groups_0"), val = int32(1)]; + tensor var_7403_strides_0 = const()[name = string("op_7403_strides_0"), val = tensor([1])]; + tensor var_7403_pad_0 = const()[name = string("op_7403_pad_0"), val = tensor([0, 0])]; + tensor var_7403_dilations_0 = const()[name = string("op_7403_dilations_0"), val = tensor([1])]; + tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427762304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429859520))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7388_cast_fp16 = transpose(perm = var_7387, x = attn_output_105_cast_fp16)[name = string("transpose_156")]; + tensor var_7403_cast_fp16 = conv(dilations = var_7403_dilations_0, groups = var_7403_groups_0, pad = var_7403_pad_0, pad_type = var_7403_pad_type_0, strides = var_7403_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_7388_cast_fp16)[name = string("op_7403_cast_fp16")]; + tensor var_7407 = const()[name = string("op_7407"), val = tensor([0, 2, 1])]; + tensor attn_output_109_cast_fp16 = transpose(perm = var_7407, x = var_7403_cast_fp16)[name = string("transpose_155")]; + tensor hidden_states_109_cast_fp16 = add(x = hidden_states_101_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; + int32 var_7420 = const()[name = string("op_7420"), val = int32(-1)]; + fp16 const_371_promoted_to_fp16 = const()[name = string("const_371_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7422_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_371_promoted_to_fp16)[name = string("op_7422_cast_fp16")]; + bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; + tensor input_191_cast_fp16 = concat(axis = var_7420, interleave = input_191_interleave_0, values = (hidden_states_109_cast_fp16, var_7422_cast_fp16))[name = string("input_191_cast_fp16")]; + tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; + fp16 var_7417_to_fp16 = const()[name = string("op_7417_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_7417_to_fp16, x = input_191_cast_fp16)[name = string("normed_173_cast_fp16")]; + tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_175_cast_fp16 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175_cast_fp16")]; + tensor const_374_promoted_to_fp16 = const()[name = string("const_374_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429925120)))]; + tensor x_173_cast_fp16 = mul(x = normed_175_cast_fp16, y = const_374_promoted_to_fp16)[name = string("x_173_cast_fp16")]; + tensor var_7447 = const()[name = string("op_7447"), val = tensor([0, 2, 1])]; + tensor input_193_axes_0 = const()[name = string("input_193_axes_0"), val = tensor([2])]; + tensor var_7448 = transpose(perm = var_7447, x = x_173_cast_fp16)[name = string("transpose_154")]; + tensor input_193 = expand_dims(axes = input_193_axes_0, x = var_7448)[name = string("input_193")]; + string input_195_pad_type_0 = const()[name = string("input_195_pad_type_0"), val = string("valid")]; + tensor input_195_strides_0 = const()[name = string("input_195_strides_0"), val = tensor([1, 1])]; + tensor input_195_pad_0 = const()[name = string("input_195_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_195_dilations_0 = const()[name = string("input_195_dilations_0"), val = tensor([1, 1])]; + int32 input_195_groups_0 = const()[name = string("input_195_groups_0"), val = int32(1)]; + tensor input_195 = conv(dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_193)[name = string("input_195")]; + string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; + tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; + tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; + int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; + tensor b_21 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_193)[name = string("b_21")]; + tensor c_21 = silu(x = input_195)[name = string("c_21")]; + tensor input_197 = mul(x = c_21, y = b_21)[name = string("input_197")]; + string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; + tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; + tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; + int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; + tensor e_21 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_197)[name = string("e_21")]; + tensor var_7470_axes_0 = const()[name = string("op_7470_axes_0"), val = tensor([2])]; + tensor var_7470 = squeeze(axes = var_7470_axes_0, x = e_21)[name = string("op_7470")]; + tensor var_7471 = const()[name = string("op_7471"), val = tensor([0, 2, 1])]; + tensor var_7472 = transpose(perm = var_7471, x = var_7470)[name = string("transpose_153")]; + tensor hidden_states_111_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = var_7472)[name = string("hidden_states_111_cast_fp16")]; + int32 var_7484 = const()[name = string("op_7484"), val = int32(-1)]; + fp16 const_375_promoted_to_fp16 = const()[name = string("const_375_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7486_cast_fp16 = mul(x = hidden_states_111_cast_fp16, y = const_375_promoted_to_fp16)[name = string("op_7486_cast_fp16")]; + bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; + tensor input_199_cast_fp16 = concat(axis = var_7484, interleave = input_199_interleave_0, values = (hidden_states_111_cast_fp16, var_7486_cast_fp16))[name = string("input_199_cast_fp16")]; + tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; + fp16 var_7481_to_fp16 = const()[name = string("op_7481_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_7481_to_fp16, x = input_199_cast_fp16)[name = string("normed_177_cast_fp16")]; + tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_179_cast_fp16 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179_cast_fp16")]; + tensor const_378_promoted_to_fp16 = const()[name = string("const_378_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429927232)))]; + tensor hidden_states_113_cast_fp16 = mul(x = normed_179_cast_fp16, y = const_378_promoted_to_fp16)[name = string("hidden_states_113_cast_fp16")]; + tensor var_7509 = const()[name = string("op_7509"), val = tensor([0, 2, 1])]; + tensor var_7512_axes_0 = const()[name = string("op_7512_axes_0"), val = tensor([2])]; + tensor var_7510_cast_fp16 = transpose(perm = var_7509, x = hidden_states_113_cast_fp16)[name = string("transpose_152")]; + tensor var_7512_cast_fp16 = expand_dims(axes = var_7512_axes_0, x = var_7510_cast_fp16)[name = string("op_7512_cast_fp16")]; + string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; + tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; + tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; + int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; + tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_7512_cast_fp16)[name = string("query_states_89")]; + string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; + tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; + tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; + int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; + tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_7512_cast_fp16)[name = string("key_states_111")]; + string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; + tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; + tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; + int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; + tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_7512_cast_fp16)[name = string("value_states_89")]; + tensor var_7554 = const()[name = string("op_7554"), val = tensor([1, 16, 128, 64])]; + tensor var_7555 = reshape(shape = var_7554, x = query_states_89)[name = string("op_7555")]; + tensor var_7560 = const()[name = string("op_7560"), val = tensor([0, 1, 3, 2])]; + tensor var_7565 = const()[name = string("op_7565"), val = tensor([1, 8, 128, 64])]; + tensor var_7566 = reshape(shape = var_7565, x = key_states_111)[name = string("op_7566")]; + tensor var_7571 = const()[name = string("op_7571"), val = tensor([0, 1, 3, 2])]; + tensor var_7576 = const()[name = string("op_7576"), val = tensor([1, 8, 128, 64])]; + tensor var_7577 = reshape(shape = var_7576, x = value_states_89)[name = string("op_7577")]; + tensor var_7582 = const()[name = string("op_7582"), val = tensor([0, 1, 3, 2])]; + int32 var_7593 = const()[name = string("op_7593"), val = int32(-1)]; + fp16 const_380_promoted = const()[name = string("const_380_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_115 = transpose(perm = var_7560, x = var_7555)[name = string("transpose_151")]; + tensor var_7595 = mul(x = hidden_states_115, y = const_380_promoted)[name = string("op_7595")]; + bool input_203_interleave_0 = const()[name = string("input_203_interleave_0"), val = bool(false)]; + tensor input_203 = concat(axis = var_7593, interleave = input_203_interleave_0, values = (hidden_states_115, var_7595))[name = string("input_203")]; + tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; + fp16 var_7590_to_fp16 = const()[name = string("op_7590_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_7590_to_fp16, x = input_203)[name = string("normed_181_cast_fp16")]; + tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_183 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183")]; + tensor const_383 = const()[name = string("const_383"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429929344)))]; + tensor q_23 = mul(x = normed_183, y = const_383)[name = string("q_23")]; + int32 var_7618 = const()[name = string("op_7618"), val = int32(-1)]; + fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_117 = transpose(perm = var_7571, x = var_7566)[name = string("transpose_150")]; + tensor var_7620 = mul(x = hidden_states_117, y = const_384_promoted)[name = string("op_7620")]; + bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; + tensor input_205 = concat(axis = var_7618, interleave = input_205_interleave_0, values = (hidden_states_117, var_7620))[name = string("input_205")]; + tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; + fp16 var_7615_to_fp16 = const()[name = string("op_7615_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_7615_to_fp16, x = input_205)[name = string("normed_185_cast_fp16")]; + tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_187 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187")]; + tensor const_387 = const()[name = string("const_387"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429929664)))]; + tensor k_23 = mul(x = normed_187, y = const_387)[name = string("k_23")]; + tensor var_7646 = mul(x = q_23, y = cos_5)[name = string("op_7646")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23)[name = string("x1_45")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23)[name = string("x2_45")]; + fp16 const_390_promoted = const()[name = string("const_390_promoted"), val = fp16(-0x1p+0)]; + tensor var_7667 = mul(x = x2_45, y = const_390_promoted)[name = string("op_7667")]; + int32 var_7669 = const()[name = string("op_7669"), val = int32(-1)]; + bool var_7670_interleave_0 = const()[name = string("op_7670_interleave_0"), val = bool(false)]; + tensor var_7670 = concat(axis = var_7669, interleave = var_7670_interleave_0, values = (var_7667, x1_45))[name = string("op_7670")]; + tensor var_7671 = mul(x = var_7670, y = sin_5)[name = string("op_7671")]; + tensor query_states_91 = add(x = var_7646, y = var_7671)[name = string("query_states_91")]; + tensor var_7674 = mul(x = k_23, y = cos_5)[name = string("op_7674")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23)[name = string("x1_47")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23)[name = string("x2_47")]; + fp16 const_393_promoted = const()[name = string("const_393_promoted"), val = fp16(-0x1p+0)]; + tensor var_7695 = mul(x = x2_47, y = const_393_promoted)[name = string("op_7695")]; + int32 var_7697 = const()[name = string("op_7697"), val = int32(-1)]; + bool var_7698_interleave_0 = const()[name = string("op_7698_interleave_0"), val = bool(false)]; + tensor var_7698 = concat(axis = var_7697, interleave = var_7698_interleave_0, values = (var_7695, x1_47))[name = string("op_7698")]; + tensor var_7699 = mul(x = var_7698, y = sin_5)[name = string("op_7699")]; + tensor key_states_113 = add(x = var_7674, y = var_7699)[name = string("key_states_113")]; + tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([11])]; + tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; + tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; + tensor expand_dims_136 = const()[name = string("expand_dims_136"), val = tensor([12])]; + int32 concat_200_axis_0 = const()[name = string("concat_200_axis_0"), val = int32(0)]; + bool concat_200_interleave_0 = const()[name = string("concat_200_interleave_0"), val = bool(false)]; + tensor concat_200 = concat(axis = concat_200_axis_0, interleave = concat_200_interleave_0, values = (expand_dims_132, expand_dims_133, current_pos, expand_dims_135))[name = string("concat_200")]; + tensor concat_201_values1_0 = const()[name = string("concat_201_values1_0"), val = tensor([0])]; + tensor concat_201_values3_0 = const()[name = string("concat_201_values3_0"), val = tensor([0])]; + int32 concat_201_axis_0 = const()[name = string("concat_201_axis_0"), val = int32(0)]; + bool concat_201_interleave_0 = const()[name = string("concat_201_interleave_0"), val = bool(false)]; + tensor concat_201 = concat(axis = concat_201_axis_0, interleave = concat_201_interleave_0, values = (expand_dims_136, concat_201_values1_0, var_1781, concat_201_values3_0))[name = string("concat_201")]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_200, begin_mask = model_model_kv_cache_0_internal_tensor_assign_23_begin_mask_0, end = concat_201, end_mask = model_model_kv_cache_0_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_23_stride_0, update = key_states_113, x = coreml_update_state_77)[name = string("model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_78")]; + tensor expand_dims_138 = const()[name = string("expand_dims_138"), val = tensor([39])]; + tensor expand_dims_139 = const()[name = string("expand_dims_139"), val = tensor([0])]; + tensor expand_dims_141 = const()[name = string("expand_dims_141"), val = tensor([0])]; + tensor expand_dims_142 = const()[name = string("expand_dims_142"), val = tensor([40])]; + int32 concat_204_axis_0 = const()[name = string("concat_204_axis_0"), val = int32(0)]; + bool concat_204_interleave_0 = const()[name = string("concat_204_interleave_0"), val = bool(false)]; + tensor concat_204 = concat(axis = concat_204_axis_0, interleave = concat_204_interleave_0, values = (expand_dims_138, expand_dims_139, current_pos, expand_dims_141))[name = string("concat_204")]; + tensor concat_205_values1_0 = const()[name = string("concat_205_values1_0"), val = tensor([0])]; + tensor concat_205_values3_0 = const()[name = string("concat_205_values3_0"), val = tensor([0])]; + int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; + bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; + tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (expand_dims_142, concat_205_values1_0, var_1781, concat_205_values3_0))[name = string("concat_205")]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_91 = transpose(perm = var_7582, x = var_7577)[name = string("transpose_149")]; + tensor model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_204, begin_mask = model_model_kv_cache_0_internal_tensor_assign_24_begin_mask_0, end = concat_205, end_mask = model_model_kv_cache_0_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_24_stride_0, update = value_states_91, x = coreml_update_state_78)[name = string("model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_79")]; + tensor var_7770_begin_0 = const()[name = string("op_7770_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor var_7770_end_0 = const()[name = string("op_7770_end_0"), val = tensor([12, 8, 4096, 128])]; + tensor var_7770_end_mask_0 = const()[name = string("op_7770_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7770_cast_fp16 = slice_by_index(begin = var_7770_begin_0, end = var_7770_end_0, end_mask = var_7770_end_mask_0, x = coreml_update_state_79)[name = string("op_7770_cast_fp16")]; + tensor K_layer_cache_23_axes_0 = const()[name = string("K_layer_cache_23_axes_0"), val = tensor([0])]; + tensor K_layer_cache_23_cast_fp16 = squeeze(axes = K_layer_cache_23_axes_0, x = var_7770_cast_fp16)[name = string("K_layer_cache_23_cast_fp16")]; + tensor var_7777_begin_0 = const()[name = string("op_7777_begin_0"), val = tensor([39, 0, 0, 0])]; + tensor var_7777_end_0 = const()[name = string("op_7777_end_0"), val = tensor([40, 8, 4096, 128])]; + tensor var_7777_end_mask_0 = const()[name = string("op_7777_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_7777_cast_fp16 = slice_by_index(begin = var_7777_begin_0, end = var_7777_end_0, end_mask = var_7777_end_mask_0, x = coreml_update_state_79)[name = string("op_7777_cast_fp16")]; + tensor V_layer_cache_23_axes_0 = const()[name = string("V_layer_cache_23_axes_0"), val = tensor([0])]; + tensor V_layer_cache_23_cast_fp16 = squeeze(axes = V_layer_cache_23_axes_0, x = var_7777_cast_fp16)[name = string("V_layer_cache_23_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_23_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_7806 = const()[name = string("op_7806"), val = tensor([1, 2, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_7806, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_7818 = const()[name = string("op_7818"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_117_cast_fp16 = reshape(shape = var_7818, x = x_181_cast_fp16)[name = string("key_states_117_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_23_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_7826 = const()[name = string("op_7826"), val = tensor([1, 2, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_7826, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_7853_transpose_x_0 = const()[name = string("op_7853_transpose_x_0"), val = bool(false)]; + bool var_7853_transpose_y_0 = const()[name = string("op_7853_transpose_y_0"), val = bool(true)]; + tensor var_7853 = matmul(transpose_x = var_7853_transpose_x_0, transpose_y = var_7853_transpose_y_0, x = query_states_91, y = key_states_117_cast_fp16)[name = string("op_7853")]; + fp16 var_7854_to_fp16 = const()[name = string("op_7854_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_45_cast_fp16 = mul(x = var_7853, y = var_7854_to_fp16)[name = string("attn_weights_45_cast_fp16")]; + tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; + int32 var_7889 = const()[name = string("op_7889"), val = int32(-1)]; + tensor var_7891_cast_fp16 = softmax(axis = var_7889, x = attn_weights_47_cast_fp16)[name = string("op_7891_cast_fp16")]; + tensor concat_210 = const()[name = string("concat_210"), val = tensor([16, 64, 4096])]; + tensor reshape_33_cast_fp16 = reshape(shape = concat_210, x = var_7891_cast_fp16)[name = string("reshape_33_cast_fp16")]; + tensor concat_211 = const()[name = string("concat_211"), val = tensor([16, 4096, 128])]; + tensor reshape_34_cast_fp16 = reshape(shape = concat_211, x = x_187_cast_fp16)[name = string("reshape_34_cast_fp16")]; + bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; + bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; + tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; + tensor concat_215 = const()[name = string("concat_215"), val = tensor([1, 16, 64, 128])]; + tensor reshape_35_cast_fp16 = reshape(shape = concat_215, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; + tensor var_7903_perm_0 = const()[name = string("op_7903_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_7922 = const()[name = string("op_7922"), val = tensor([1, 64, 2048])]; + tensor var_7903_cast_fp16 = transpose(perm = var_7903_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_148")]; + tensor attn_output_115_cast_fp16 = reshape(shape = var_7922, x = var_7903_cast_fp16)[name = string("attn_output_115_cast_fp16")]; + tensor var_7927 = const()[name = string("op_7927"), val = tensor([0, 2, 1])]; + string var_7943_pad_type_0 = const()[name = string("op_7943_pad_type_0"), val = string("valid")]; + int32 var_7943_groups_0 = const()[name = string("op_7943_groups_0"), val = int32(1)]; + tensor var_7943_strides_0 = const()[name = string("op_7943_strides_0"), val = tensor([1])]; + tensor var_7943_pad_0 = const()[name = string("op_7943_pad_0"), val = tensor([0, 0])]; + tensor var_7943_dilations_0 = const()[name = string("op_7943_dilations_0"), val = tensor([1])]; + tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429929984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432027200))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_7928_cast_fp16 = transpose(perm = var_7927, x = attn_output_115_cast_fp16)[name = string("transpose_147")]; + tensor var_7943_cast_fp16 = conv(dilations = var_7943_dilations_0, groups = var_7943_groups_0, pad = var_7943_pad_0, pad_type = var_7943_pad_type_0, strides = var_7943_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_7928_cast_fp16)[name = string("op_7943_cast_fp16")]; + tensor var_7947 = const()[name = string("op_7947"), val = tensor([0, 2, 1])]; + tensor attn_output_119_cast_fp16 = transpose(perm = var_7947, x = var_7943_cast_fp16)[name = string("transpose_146")]; + tensor hidden_states_119_cast_fp16 = add(x = hidden_states_111_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + int32 var_7960 = const()[name = string("op_7960"), val = int32(-1)]; + fp16 const_405_promoted_to_fp16 = const()[name = string("const_405_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_7962_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = const_405_promoted_to_fp16)[name = string("op_7962_cast_fp16")]; + bool input_209_interleave_0 = const()[name = string("input_209_interleave_0"), val = bool(false)]; + tensor input_209_cast_fp16 = concat(axis = var_7960, interleave = input_209_interleave_0, values = (hidden_states_119_cast_fp16, var_7962_cast_fp16))[name = string("input_209_cast_fp16")]; + tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; + fp16 var_7957_to_fp16 = const()[name = string("op_7957_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_7957_to_fp16, x = input_209_cast_fp16)[name = string("normed_189_cast_fp16")]; + tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; + tensor const_408_promoted_to_fp16 = const()[name = string("const_408_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432092800)))]; + tensor x_189_cast_fp16 = mul(x = normed_191_cast_fp16, y = const_408_promoted_to_fp16)[name = string("x_189_cast_fp16")]; + tensor var_7987 = const()[name = string("op_7987"), val = tensor([0, 2, 1])]; + tensor input_211_axes_0 = const()[name = string("input_211_axes_0"), val = tensor([2])]; + tensor var_7988 = transpose(perm = var_7987, x = x_189_cast_fp16)[name = string("transpose_145")]; + tensor input_211 = expand_dims(axes = input_211_axes_0, x = var_7988)[name = string("input_211")]; + string input_213_pad_type_0 = const()[name = string("input_213_pad_type_0"), val = string("valid")]; + tensor input_213_strides_0 = const()[name = string("input_213_strides_0"), val = tensor([1, 1])]; + tensor input_213_pad_0 = const()[name = string("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_213_dilations_0 = const()[name = string("input_213_dilations_0"), val = tensor([1, 1])]; + int32 input_213_groups_0 = const()[name = string("input_213_groups_0"), val = int32(1)]; + tensor input_213 = conv(dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_211)[name = string("input_213")]; + string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; + tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; + tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; + int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; + tensor b_23 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_211)[name = string("b_23")]; + tensor c_23 = silu(x = input_213)[name = string("c_23")]; + tensor input_215 = mul(x = c_23, y = b_23)[name = string("input_215")]; + string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; + tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; + tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; + int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; + tensor e_23 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_215)[name = string("e_23")]; + tensor var_8010_axes_0 = const()[name = string("op_8010_axes_0"), val = tensor([2])]; + tensor var_8010 = squeeze(axes = var_8010_axes_0, x = e_23)[name = string("op_8010")]; + tensor var_8011 = const()[name = string("op_8011"), val = tensor([0, 2, 1])]; + tensor var_8012 = transpose(perm = var_8011, x = var_8010)[name = string("transpose_144")]; + tensor hidden_states_121_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = var_8012)[name = string("hidden_states_121_cast_fp16")]; + int32 var_8024 = const()[name = string("op_8024"), val = int32(-1)]; + fp16 const_409_promoted_to_fp16 = const()[name = string("const_409_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8026_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = const_409_promoted_to_fp16)[name = string("op_8026_cast_fp16")]; + bool input_217_interleave_0 = const()[name = string("input_217_interleave_0"), val = bool(false)]; + tensor input_217_cast_fp16 = concat(axis = var_8024, interleave = input_217_interleave_0, values = (hidden_states_121_cast_fp16, var_8026_cast_fp16))[name = string("input_217_cast_fp16")]; + tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; + fp16 var_8021_to_fp16 = const()[name = string("op_8021_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_8021_to_fp16, x = input_217_cast_fp16)[name = string("normed_193_cast_fp16")]; + tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; + tensor const_412_promoted_to_fp16 = const()[name = string("const_412_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432094912)))]; + tensor hidden_states_123_cast_fp16 = mul(x = normed_195_cast_fp16, y = const_412_promoted_to_fp16)[name = string("hidden_states_123_cast_fp16")]; + tensor var_8049 = const()[name = string("op_8049"), val = tensor([0, 2, 1])]; + tensor var_8052_axes_0 = const()[name = string("op_8052_axes_0"), val = tensor([2])]; + tensor var_8050_cast_fp16 = transpose(perm = var_8049, x = hidden_states_123_cast_fp16)[name = string("transpose_143")]; + tensor var_8052_cast_fp16 = expand_dims(axes = var_8052_axes_0, x = var_8050_cast_fp16)[name = string("op_8052_cast_fp16")]; + string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; + tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; + tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; + int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; + tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_8052_cast_fp16)[name = string("query_states_97")]; + string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; + tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; + tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; + int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; + tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_8052_cast_fp16)[name = string("key_states_121")]; + string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; + tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; + tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; + int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; + tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_8052_cast_fp16)[name = string("value_states_97")]; + tensor var_8094 = const()[name = string("op_8094"), val = tensor([1, 16, 128, 64])]; + tensor var_8095 = reshape(shape = var_8094, x = query_states_97)[name = string("op_8095")]; + tensor var_8100 = const()[name = string("op_8100"), val = tensor([0, 1, 3, 2])]; + tensor var_8105 = const()[name = string("op_8105"), val = tensor([1, 8, 128, 64])]; + tensor var_8106 = reshape(shape = var_8105, x = key_states_121)[name = string("op_8106")]; + tensor var_8111 = const()[name = string("op_8111"), val = tensor([0, 1, 3, 2])]; + tensor var_8116 = const()[name = string("op_8116"), val = tensor([1, 8, 128, 64])]; + tensor var_8117 = reshape(shape = var_8116, x = value_states_97)[name = string("op_8117")]; + tensor var_8122 = const()[name = string("op_8122"), val = tensor([0, 1, 3, 2])]; + int32 var_8133 = const()[name = string("op_8133"), val = int32(-1)]; + fp16 const_414_promoted = const()[name = string("const_414_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_125 = transpose(perm = var_8100, x = var_8095)[name = string("transpose_142")]; + tensor var_8135 = mul(x = hidden_states_125, y = const_414_promoted)[name = string("op_8135")]; + bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; + tensor input_221 = concat(axis = var_8133, interleave = input_221_interleave_0, values = (hidden_states_125, var_8135))[name = string("input_221")]; + tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; + fp16 var_8130_to_fp16 = const()[name = string("op_8130_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_8130_to_fp16, x = input_221)[name = string("normed_197_cast_fp16")]; + tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; + tensor const_417 = const()[name = string("const_417"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432097024)))]; + tensor q_25 = mul(x = normed_199, y = const_417)[name = string("q_25")]; + int32 var_8158 = const()[name = string("op_8158"), val = int32(-1)]; + fp16 const_418_promoted = const()[name = string("const_418_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_127 = transpose(perm = var_8111, x = var_8106)[name = string("transpose_141")]; + tensor var_8160 = mul(x = hidden_states_127, y = const_418_promoted)[name = string("op_8160")]; + bool input_223_interleave_0 = const()[name = string("input_223_interleave_0"), val = bool(false)]; + tensor input_223 = concat(axis = var_8158, interleave = input_223_interleave_0, values = (hidden_states_127, var_8160))[name = string("input_223")]; + tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; + fp16 var_8155_to_fp16 = const()[name = string("op_8155_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_8155_to_fp16, x = input_223)[name = string("normed_201_cast_fp16")]; + tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; + tensor const_421 = const()[name = string("const_421"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432097344)))]; + tensor k_25 = mul(x = normed_203, y = const_421)[name = string("k_25")]; + tensor var_8186 = mul(x = q_25, y = cos_5)[name = string("op_8186")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25)[name = string("x1_49")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25)[name = string("x2_49")]; + fp16 const_424_promoted = const()[name = string("const_424_promoted"), val = fp16(-0x1p+0)]; + tensor var_8207 = mul(x = x2_49, y = const_424_promoted)[name = string("op_8207")]; + int32 var_8209 = const()[name = string("op_8209"), val = int32(-1)]; + bool var_8210_interleave_0 = const()[name = string("op_8210_interleave_0"), val = bool(false)]; + tensor var_8210 = concat(axis = var_8209, interleave = var_8210_interleave_0, values = (var_8207, x1_49))[name = string("op_8210")]; + tensor var_8211 = mul(x = var_8210, y = sin_5)[name = string("op_8211")]; + tensor query_states_99 = add(x = var_8186, y = var_8211)[name = string("query_states_99")]; + tensor var_8214 = mul(x = k_25, y = cos_5)[name = string("op_8214")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25)[name = string("x1_51")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25)[name = string("x2_51")]; + fp16 const_427_promoted = const()[name = string("const_427_promoted"), val = fp16(-0x1p+0)]; + tensor var_8235 = mul(x = x2_51, y = const_427_promoted)[name = string("op_8235")]; + int32 var_8237 = const()[name = string("op_8237"), val = int32(-1)]; + bool var_8238_interleave_0 = const()[name = string("op_8238_interleave_0"), val = bool(false)]; + tensor var_8238 = concat(axis = var_8237, interleave = var_8238_interleave_0, values = (var_8235, x1_51))[name = string("op_8238")]; + tensor var_8239 = mul(x = var_8238, y = sin_5)[name = string("op_8239")]; + tensor key_states_123 = add(x = var_8214, y = var_8239)[name = string("key_states_123")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([12])]; + tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; + tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; + tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([13])]; + int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; + bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; + tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_218")]; + tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; + tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; + int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; + bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; + tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_148, concat_219_values1_0, var_1781, concat_219_values3_0))[name = string("concat_219")]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_0_internal_tensor_assign_25_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_0_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_25_stride_0, update = key_states_123, x = coreml_update_state_79)[name = string("model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_80")]; + tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([40])]; + tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; + tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; + tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([41])]; + int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; + bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; + tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_222")]; + tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; + tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; + int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; + bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; + tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_154, concat_223_values1_0, var_1781, concat_223_values3_0))[name = string("concat_223")]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_99 = transpose(perm = var_8122, x = var_8117)[name = string("transpose_140")]; + tensor model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_0_internal_tensor_assign_26_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_0_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_26_stride_0, update = value_states_99, x = coreml_update_state_80)[name = string("model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_81")]; + tensor var_8310_begin_0 = const()[name = string("op_8310_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor var_8310_end_0 = const()[name = string("op_8310_end_0"), val = tensor([13, 8, 4096, 128])]; + tensor var_8310_end_mask_0 = const()[name = string("op_8310_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8310_cast_fp16 = slice_by_index(begin = var_8310_begin_0, end = var_8310_end_0, end_mask = var_8310_end_mask_0, x = coreml_update_state_81)[name = string("op_8310_cast_fp16")]; + tensor K_layer_cache_25_axes_0 = const()[name = string("K_layer_cache_25_axes_0"), val = tensor([0])]; + tensor K_layer_cache_25_cast_fp16 = squeeze(axes = K_layer_cache_25_axes_0, x = var_8310_cast_fp16)[name = string("K_layer_cache_25_cast_fp16")]; + tensor var_8317_begin_0 = const()[name = string("op_8317_begin_0"), val = tensor([40, 0, 0, 0])]; + tensor var_8317_end_0 = const()[name = string("op_8317_end_0"), val = tensor([41, 8, 4096, 128])]; + tensor var_8317_end_mask_0 = const()[name = string("op_8317_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8317_cast_fp16 = slice_by_index(begin = var_8317_begin_0, end = var_8317_end_0, end_mask = var_8317_end_mask_0, x = coreml_update_state_81)[name = string("op_8317_cast_fp16")]; + tensor V_layer_cache_25_axes_0 = const()[name = string("V_layer_cache_25_axes_0"), val = tensor([0])]; + tensor V_layer_cache_25_cast_fp16 = squeeze(axes = V_layer_cache_25_axes_0, x = var_8317_cast_fp16)[name = string("V_layer_cache_25_cast_fp16")]; + tensor x_195_axes_0 = const()[name = string("x_195_axes_0"), val = tensor([1])]; + tensor x_195_cast_fp16 = expand_dims(axes = x_195_axes_0, x = K_layer_cache_25_cast_fp16)[name = string("x_195_cast_fp16")]; + tensor var_8346 = const()[name = string("op_8346"), val = tensor([1, 2, 1, 1])]; + tensor x_197_cast_fp16 = tile(reps = var_8346, x = x_195_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_8358 = const()[name = string("op_8358"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_127_cast_fp16 = reshape(shape = var_8358, x = x_197_cast_fp16)[name = string("key_states_127_cast_fp16")]; + tensor x_201_axes_0 = const()[name = string("x_201_axes_0"), val = tensor([1])]; + tensor x_201_cast_fp16 = expand_dims(axes = x_201_axes_0, x = V_layer_cache_25_cast_fp16)[name = string("x_201_cast_fp16")]; + tensor var_8366 = const()[name = string("op_8366"), val = tensor([1, 2, 1, 1])]; + tensor x_203_cast_fp16 = tile(reps = var_8366, x = x_201_cast_fp16)[name = string("x_203_cast_fp16")]; + bool var_8393_transpose_x_0 = const()[name = string("op_8393_transpose_x_0"), val = bool(false)]; + bool var_8393_transpose_y_0 = const()[name = string("op_8393_transpose_y_0"), val = bool(true)]; + tensor var_8393 = matmul(transpose_x = var_8393_transpose_x_0, transpose_y = var_8393_transpose_y_0, x = query_states_99, y = key_states_127_cast_fp16)[name = string("op_8393")]; + fp16 var_8394_to_fp16 = const()[name = string("op_8394_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_49_cast_fp16 = mul(x = var_8393, y = var_8394_to_fp16)[name = string("attn_weights_49_cast_fp16")]; + tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = causal_mask)[name = string("attn_weights_51_cast_fp16")]; + int32 var_8429 = const()[name = string("op_8429"), val = int32(-1)]; + tensor var_8431_cast_fp16 = softmax(axis = var_8429, x = attn_weights_51_cast_fp16)[name = string("op_8431_cast_fp16")]; + tensor concat_228 = const()[name = string("concat_228"), val = tensor([16, 64, 4096])]; + tensor reshape_36_cast_fp16 = reshape(shape = concat_228, x = var_8431_cast_fp16)[name = string("reshape_36_cast_fp16")]; + tensor concat_229 = const()[name = string("concat_229"), val = tensor([16, 4096, 128])]; + tensor reshape_37_cast_fp16 = reshape(shape = concat_229, x = x_203_cast_fp16)[name = string("reshape_37_cast_fp16")]; + bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; + bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; + tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; + tensor concat_233 = const()[name = string("concat_233"), val = tensor([1, 16, 64, 128])]; + tensor reshape_38_cast_fp16 = reshape(shape = concat_233, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; + tensor var_8443_perm_0 = const()[name = string("op_8443_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_8462 = const()[name = string("op_8462"), val = tensor([1, 64, 2048])]; + tensor var_8443_cast_fp16 = transpose(perm = var_8443_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_139")]; + tensor attn_output_125_cast_fp16 = reshape(shape = var_8462, x = var_8443_cast_fp16)[name = string("attn_output_125_cast_fp16")]; + tensor var_8467 = const()[name = string("op_8467"), val = tensor([0, 2, 1])]; + string var_8483_pad_type_0 = const()[name = string("op_8483_pad_type_0"), val = string("valid")]; + int32 var_8483_groups_0 = const()[name = string("op_8483_groups_0"), val = int32(1)]; + tensor var_8483_strides_0 = const()[name = string("op_8483_strides_0"), val = tensor([1])]; + tensor var_8483_pad_0 = const()[name = string("op_8483_pad_0"), val = tensor([0, 0])]; + tensor var_8483_dilations_0 = const()[name = string("op_8483_dilations_0"), val = tensor([1])]; + tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432097664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434194880))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_8468_cast_fp16 = transpose(perm = var_8467, x = attn_output_125_cast_fp16)[name = string("transpose_138")]; + tensor var_8483_cast_fp16 = conv(dilations = var_8483_dilations_0, groups = var_8483_groups_0, pad = var_8483_pad_0, pad_type = var_8483_pad_type_0, strides = var_8483_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_8468_cast_fp16)[name = string("op_8483_cast_fp16")]; + tensor var_8487 = const()[name = string("op_8487"), val = tensor([0, 2, 1])]; + tensor attn_output_129_cast_fp16 = transpose(perm = var_8487, x = var_8483_cast_fp16)[name = string("transpose_137")]; + tensor hidden_states_129_cast_fp16 = add(x = hidden_states_121_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; + int32 var_8500 = const()[name = string("op_8500"), val = int32(-1)]; + fp16 const_439_promoted_to_fp16 = const()[name = string("const_439_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8502_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_439_promoted_to_fp16)[name = string("op_8502_cast_fp16")]; + bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; + tensor input_227_cast_fp16 = concat(axis = var_8500, interleave = input_227_interleave_0, values = (hidden_states_129_cast_fp16, var_8502_cast_fp16))[name = string("input_227_cast_fp16")]; + tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; + fp16 var_8497_to_fp16 = const()[name = string("op_8497_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_8497_to_fp16, x = input_227_cast_fp16)[name = string("normed_205_cast_fp16")]; + tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; + tensor const_442_promoted_to_fp16 = const()[name = string("const_442_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434260480)))]; + tensor x_205_cast_fp16 = mul(x = normed_207_cast_fp16, y = const_442_promoted_to_fp16)[name = string("x_205_cast_fp16")]; + tensor var_8527 = const()[name = string("op_8527"), val = tensor([0, 2, 1])]; + tensor input_229_axes_0 = const()[name = string("input_229_axes_0"), val = tensor([2])]; + tensor var_8528 = transpose(perm = var_8527, x = x_205_cast_fp16)[name = string("transpose_136")]; + tensor input_229 = expand_dims(axes = input_229_axes_0, x = var_8528)[name = string("input_229")]; + string input_231_pad_type_0 = const()[name = string("input_231_pad_type_0"), val = string("valid")]; + tensor input_231_strides_0 = const()[name = string("input_231_strides_0"), val = tensor([1, 1])]; + tensor input_231_pad_0 = const()[name = string("input_231_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_231_dilations_0 = const()[name = string("input_231_dilations_0"), val = tensor([1, 1])]; + int32 input_231_groups_0 = const()[name = string("input_231_groups_0"), val = int32(1)]; + tensor input_231 = conv(dilations = input_231_dilations_0, groups = input_231_groups_0, pad = input_231_pad_0, pad_type = input_231_pad_type_0, strides = input_231_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_229)[name = string("input_231")]; + string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; + tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; + tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; + int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; + tensor b_25 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_229)[name = string("b_25")]; + tensor c_25 = silu(x = input_231)[name = string("c_25")]; + tensor input_233 = mul(x = c_25, y = b_25)[name = string("input_233")]; + string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; + tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; + tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; + int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; + tensor e_25 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_233)[name = string("e_25")]; + tensor var_8550_axes_0 = const()[name = string("op_8550_axes_0"), val = tensor([2])]; + tensor var_8550 = squeeze(axes = var_8550_axes_0, x = e_25)[name = string("op_8550")]; + tensor var_8551 = const()[name = string("op_8551"), val = tensor([0, 2, 1])]; + tensor var_8552 = transpose(perm = var_8551, x = var_8550)[name = string("transpose_135")]; + tensor hidden_states_131_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = var_8552)[name = string("hidden_states_131_cast_fp16")]; + int32 var_8564 = const()[name = string("op_8564"), val = int32(-1)]; + fp16 const_443_promoted_to_fp16 = const()[name = string("const_443_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_8566_cast_fp16 = mul(x = hidden_states_131_cast_fp16, y = const_443_promoted_to_fp16)[name = string("op_8566_cast_fp16")]; + bool input_235_interleave_0 = const()[name = string("input_235_interleave_0"), val = bool(false)]; + tensor input_235_cast_fp16 = concat(axis = var_8564, interleave = input_235_interleave_0, values = (hidden_states_131_cast_fp16, var_8566_cast_fp16))[name = string("input_235_cast_fp16")]; + tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; + fp16 var_8561_to_fp16 = const()[name = string("op_8561_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_8561_to_fp16, x = input_235_cast_fp16)[name = string("normed_209_cast_fp16")]; + tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; + tensor const_446_promoted_to_fp16 = const()[name = string("const_446_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434262592)))]; + tensor hidden_states_133_cast_fp16 = mul(x = normed_211_cast_fp16, y = const_446_promoted_to_fp16)[name = string("hidden_states_133_cast_fp16")]; + tensor var_8589 = const()[name = string("op_8589"), val = tensor([0, 2, 1])]; + tensor var_8592_axes_0 = const()[name = string("op_8592_axes_0"), val = tensor([2])]; + tensor var_8590_cast_fp16 = transpose(perm = var_8589, x = hidden_states_133_cast_fp16)[name = string("transpose_134")]; + tensor var_8592_cast_fp16 = expand_dims(axes = var_8592_axes_0, x = var_8590_cast_fp16)[name = string("op_8592_cast_fp16")]; + string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; + tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; + tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; + int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; + tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_8592_cast_fp16)[name = string("query_states_105")]; + string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; + tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; + tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; + int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; + tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_8592_cast_fp16)[name = string("key_states_131")]; + string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; + tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; + tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; + int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; + tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_8592_cast_fp16)[name = string("value_states_105")]; + tensor var_8634 = const()[name = string("op_8634"), val = tensor([1, 16, 128, 64])]; + tensor var_8635 = reshape(shape = var_8634, x = query_states_105)[name = string("op_8635")]; + tensor var_8640 = const()[name = string("op_8640"), val = tensor([0, 1, 3, 2])]; + tensor var_8645 = const()[name = string("op_8645"), val = tensor([1, 8, 128, 64])]; + tensor var_8646 = reshape(shape = var_8645, x = key_states_131)[name = string("op_8646")]; + tensor var_8651 = const()[name = string("op_8651"), val = tensor([0, 1, 3, 2])]; + tensor var_8656 = const()[name = string("op_8656"), val = tensor([1, 8, 128, 64])]; + tensor var_8657 = reshape(shape = var_8656, x = value_states_105)[name = string("op_8657")]; + tensor var_8662 = const()[name = string("op_8662"), val = tensor([0, 1, 3, 2])]; + int32 var_8673 = const()[name = string("op_8673"), val = int32(-1)]; + fp16 const_448_promoted = const()[name = string("const_448_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_135 = transpose(perm = var_8640, x = var_8635)[name = string("transpose_133")]; + tensor var_8675 = mul(x = hidden_states_135, y = const_448_promoted)[name = string("op_8675")]; + bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; + tensor input_239 = concat(axis = var_8673, interleave = input_239_interleave_0, values = (hidden_states_135, var_8675))[name = string("input_239")]; + tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; + fp16 var_8670_to_fp16 = const()[name = string("op_8670_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_8670_to_fp16, x = input_239)[name = string("normed_213_cast_fp16")]; + tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_215 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215")]; + tensor const_451 = const()[name = string("const_451"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434264704)))]; + tensor q_27 = mul(x = normed_215, y = const_451)[name = string("q_27")]; + int32 var_8698 = const()[name = string("op_8698"), val = int32(-1)]; + fp16 const_452_promoted = const()[name = string("const_452_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_137 = transpose(perm = var_8651, x = var_8646)[name = string("transpose_132")]; + tensor var_8700 = mul(x = hidden_states_137, y = const_452_promoted)[name = string("op_8700")]; + bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; + tensor input_241 = concat(axis = var_8698, interleave = input_241_interleave_0, values = (hidden_states_137, var_8700))[name = string("input_241")]; + tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; + fp16 var_8695_to_fp16 = const()[name = string("op_8695_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_8695_to_fp16, x = input_241)[name = string("normed_217_cast_fp16")]; + tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_219 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219")]; + tensor const_455 = const()[name = string("const_455"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434265024)))]; + tensor k_27 = mul(x = normed_219, y = const_455)[name = string("k_27")]; + tensor var_8726 = mul(x = q_27, y = cos_5)[name = string("op_8726")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27)[name = string("x1_53")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27)[name = string("x2_53")]; + fp16 const_458_promoted = const()[name = string("const_458_promoted"), val = fp16(-0x1p+0)]; + tensor var_8747 = mul(x = x2_53, y = const_458_promoted)[name = string("op_8747")]; + int32 var_8749 = const()[name = string("op_8749"), val = int32(-1)]; + bool var_8750_interleave_0 = const()[name = string("op_8750_interleave_0"), val = bool(false)]; + tensor var_8750 = concat(axis = var_8749, interleave = var_8750_interleave_0, values = (var_8747, x1_53))[name = string("op_8750")]; + tensor var_8751 = mul(x = var_8750, y = sin_5)[name = string("op_8751")]; + tensor query_states_107 = add(x = var_8726, y = var_8751)[name = string("query_states_107")]; + tensor var_8754 = mul(x = k_27, y = cos_5)[name = string("op_8754")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27)[name = string("x1_55")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27)[name = string("x2_55")]; + fp16 const_461_promoted = const()[name = string("const_461_promoted"), val = fp16(-0x1p+0)]; + tensor var_8775 = mul(x = x2_55, y = const_461_promoted)[name = string("op_8775")]; + int32 var_8777 = const()[name = string("op_8777"), val = int32(-1)]; + bool var_8778_interleave_0 = const()[name = string("op_8778_interleave_0"), val = bool(false)]; + tensor var_8778 = concat(axis = var_8777, interleave = var_8778_interleave_0, values = (var_8775, x1_55))[name = string("op_8778")]; + tensor var_8779 = mul(x = var_8778, y = sin_5)[name = string("op_8779")]; + tensor key_states_133 = add(x = var_8754, y = var_8779)[name = string("key_states_133")]; + tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([13])]; + tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; + tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([14])]; + int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; + bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; + tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_236")]; + tensor concat_237_values1_0 = const()[name = string("concat_237_values1_0"), val = tensor([0])]; + tensor concat_237_values3_0 = const()[name = string("concat_237_values3_0"), val = tensor([0])]; + int32 concat_237_axis_0 = const()[name = string("concat_237_axis_0"), val = int32(0)]; + bool concat_237_interleave_0 = const()[name = string("concat_237_interleave_0"), val = bool(false)]; + tensor concat_237 = concat(axis = concat_237_axis_0, interleave = concat_237_interleave_0, values = (expand_dims_160, concat_237_values1_0, var_1781, concat_237_values3_0))[name = string("concat_237")]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_236, begin_mask = model_model_kv_cache_0_internal_tensor_assign_27_begin_mask_0, end = concat_237, end_mask = model_model_kv_cache_0_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_27_stride_0, update = key_states_133, x = coreml_update_state_81)[name = string("model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_82")]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([41])]; + tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; + tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; + tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([42])]; + int32 concat_240_axis_0 = const()[name = string("concat_240_axis_0"), val = int32(0)]; + bool concat_240_interleave_0 = const()[name = string("concat_240_interleave_0"), val = bool(false)]; + tensor concat_240 = concat(axis = concat_240_axis_0, interleave = concat_240_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_240")]; + tensor concat_241_values1_0 = const()[name = string("concat_241_values1_0"), val = tensor([0])]; + tensor concat_241_values3_0 = const()[name = string("concat_241_values3_0"), val = tensor([0])]; + int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; + bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; + tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (expand_dims_166, concat_241_values1_0, var_1781, concat_241_values3_0))[name = string("concat_241")]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_107 = transpose(perm = var_8662, x = var_8657)[name = string("transpose_131")]; + tensor model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_0_internal_tensor_assign_28_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_0_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_28_stride_0, update = value_states_107, x = coreml_update_state_82)[name = string("model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_83")]; + tensor var_8850_begin_0 = const()[name = string("op_8850_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor var_8850_end_0 = const()[name = string("op_8850_end_0"), val = tensor([14, 8, 4096, 128])]; + tensor var_8850_end_mask_0 = const()[name = string("op_8850_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8850_cast_fp16 = slice_by_index(begin = var_8850_begin_0, end = var_8850_end_0, end_mask = var_8850_end_mask_0, x = coreml_update_state_83)[name = string("op_8850_cast_fp16")]; + tensor K_layer_cache_27_axes_0 = const()[name = string("K_layer_cache_27_axes_0"), val = tensor([0])]; + tensor K_layer_cache_27_cast_fp16 = squeeze(axes = K_layer_cache_27_axes_0, x = var_8850_cast_fp16)[name = string("K_layer_cache_27_cast_fp16")]; + tensor var_8857_begin_0 = const()[name = string("op_8857_begin_0"), val = tensor([41, 0, 0, 0])]; + tensor var_8857_end_0 = const()[name = string("op_8857_end_0"), val = tensor([42, 8, 4096, 128])]; + tensor var_8857_end_mask_0 = const()[name = string("op_8857_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_8857_cast_fp16 = slice_by_index(begin = var_8857_begin_0, end = var_8857_end_0, end_mask = var_8857_end_mask_0, x = coreml_update_state_83)[name = string("op_8857_cast_fp16")]; + tensor V_layer_cache_27_axes_0 = const()[name = string("V_layer_cache_27_axes_0"), val = tensor([0])]; + tensor V_layer_cache_27_cast_fp16 = squeeze(axes = V_layer_cache_27_axes_0, x = var_8857_cast_fp16)[name = string("V_layer_cache_27_cast_fp16")]; + tensor x_211_axes_0 = const()[name = string("x_211_axes_0"), val = tensor([1])]; + tensor x_211_cast_fp16 = expand_dims(axes = x_211_axes_0, x = K_layer_cache_27_cast_fp16)[name = string("x_211_cast_fp16")]; + tensor var_8886 = const()[name = string("op_8886"), val = tensor([1, 2, 1, 1])]; + tensor x_213_cast_fp16 = tile(reps = var_8886, x = x_211_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_8898 = const()[name = string("op_8898"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_137_cast_fp16 = reshape(shape = var_8898, x = x_213_cast_fp16)[name = string("key_states_137_cast_fp16")]; + tensor x_217_axes_0 = const()[name = string("x_217_axes_0"), val = tensor([1])]; + tensor x_217_cast_fp16 = expand_dims(axes = x_217_axes_0, x = V_layer_cache_27_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_8906 = const()[name = string("op_8906"), val = tensor([1, 2, 1, 1])]; + tensor x_219_cast_fp16 = tile(reps = var_8906, x = x_217_cast_fp16)[name = string("x_219_cast_fp16")]; + bool var_8933_transpose_x_0 = const()[name = string("op_8933_transpose_x_0"), val = bool(false)]; + bool var_8933_transpose_y_0 = const()[name = string("op_8933_transpose_y_0"), val = bool(true)]; + tensor var_8933 = matmul(transpose_x = var_8933_transpose_x_0, transpose_y = var_8933_transpose_y_0, x = query_states_107, y = key_states_137_cast_fp16)[name = string("op_8933")]; + fp16 var_8934_to_fp16 = const()[name = string("op_8934_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_53_cast_fp16 = mul(x = var_8933, y = var_8934_to_fp16)[name = string("attn_weights_53_cast_fp16")]; + tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = causal_mask)[name = string("attn_weights_55_cast_fp16")]; + int32 var_8969 = const()[name = string("op_8969"), val = int32(-1)]; + tensor var_8971_cast_fp16 = softmax(axis = var_8969, x = attn_weights_55_cast_fp16)[name = string("op_8971_cast_fp16")]; + tensor concat_246 = const()[name = string("concat_246"), val = tensor([16, 64, 4096])]; + tensor reshape_39_cast_fp16 = reshape(shape = concat_246, x = var_8971_cast_fp16)[name = string("reshape_39_cast_fp16")]; + tensor concat_247 = const()[name = string("concat_247"), val = tensor([16, 4096, 128])]; + tensor reshape_40_cast_fp16 = reshape(shape = concat_247, x = x_219_cast_fp16)[name = string("reshape_40_cast_fp16")]; + bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; + bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; + tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; + tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 16, 64, 128])]; + tensor reshape_41_cast_fp16 = reshape(shape = concat_251, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; + tensor var_8983_perm_0 = const()[name = string("op_8983_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9002 = const()[name = string("op_9002"), val = tensor([1, 64, 2048])]; + tensor var_8983_cast_fp16 = transpose(perm = var_8983_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_130")]; + tensor attn_output_135_cast_fp16 = reshape(shape = var_9002, x = var_8983_cast_fp16)[name = string("attn_output_135_cast_fp16")]; + tensor var_9007 = const()[name = string("op_9007"), val = tensor([0, 2, 1])]; + string var_9023_pad_type_0 = const()[name = string("op_9023_pad_type_0"), val = string("valid")]; + int32 var_9023_groups_0 = const()[name = string("op_9023_groups_0"), val = int32(1)]; + tensor var_9023_strides_0 = const()[name = string("op_9023_strides_0"), val = tensor([1])]; + tensor var_9023_pad_0 = const()[name = string("op_9023_pad_0"), val = tensor([0, 0])]; + tensor var_9023_dilations_0 = const()[name = string("op_9023_dilations_0"), val = tensor([1])]; + tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434265344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436362560))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9008_cast_fp16 = transpose(perm = var_9007, x = attn_output_135_cast_fp16)[name = string("transpose_129")]; + tensor var_9023_cast_fp16 = conv(dilations = var_9023_dilations_0, groups = var_9023_groups_0, pad = var_9023_pad_0, pad_type = var_9023_pad_type_0, strides = var_9023_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_9008_cast_fp16)[name = string("op_9023_cast_fp16")]; + tensor var_9027 = const()[name = string("op_9027"), val = tensor([0, 2, 1])]; + tensor attn_output_139_cast_fp16 = transpose(perm = var_9027, x = var_9023_cast_fp16)[name = string("transpose_128")]; + tensor hidden_states_139_cast_fp16 = add(x = hidden_states_131_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; + int32 var_9040 = const()[name = string("op_9040"), val = int32(-1)]; + fp16 const_473_promoted_to_fp16 = const()[name = string("const_473_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9042_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = const_473_promoted_to_fp16)[name = string("op_9042_cast_fp16")]; + bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; + tensor input_245_cast_fp16 = concat(axis = var_9040, interleave = input_245_interleave_0, values = (hidden_states_139_cast_fp16, var_9042_cast_fp16))[name = string("input_245_cast_fp16")]; + tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; + fp16 var_9037_to_fp16 = const()[name = string("op_9037_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_9037_to_fp16, x = input_245_cast_fp16)[name = string("normed_221_cast_fp16")]; + tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_223_cast_fp16 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223_cast_fp16")]; + tensor const_476_promoted_to_fp16 = const()[name = string("const_476_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436428160)))]; + tensor x_221_cast_fp16 = mul(x = normed_223_cast_fp16, y = const_476_promoted_to_fp16)[name = string("x_221_cast_fp16")]; + tensor var_9067 = const()[name = string("op_9067"), val = tensor([0, 2, 1])]; + tensor input_247_axes_0 = const()[name = string("input_247_axes_0"), val = tensor([2])]; + tensor var_9068 = transpose(perm = var_9067, x = x_221_cast_fp16)[name = string("transpose_127")]; + tensor input_247 = expand_dims(axes = input_247_axes_0, x = var_9068)[name = string("input_247")]; + string input_249_pad_type_0 = const()[name = string("input_249_pad_type_0"), val = string("valid")]; + tensor input_249_strides_0 = const()[name = string("input_249_strides_0"), val = tensor([1, 1])]; + tensor input_249_pad_0 = const()[name = string("input_249_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_249_dilations_0 = const()[name = string("input_249_dilations_0"), val = tensor([1, 1])]; + int32 input_249_groups_0 = const()[name = string("input_249_groups_0"), val = int32(1)]; + tensor input_249 = conv(dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_247)[name = string("input_249")]; + string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; + tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; + tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; + int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; + tensor b_27 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_247)[name = string("b_27")]; + tensor c_27 = silu(x = input_249)[name = string("c_27")]; + tensor input_251 = mul(x = c_27, y = b_27)[name = string("input_251")]; + string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; + tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; + tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; + int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; + tensor e_27 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_251)[name = string("e_27")]; + tensor var_9090_axes_0 = const()[name = string("op_9090_axes_0"), val = tensor([2])]; + tensor var_9090 = squeeze(axes = var_9090_axes_0, x = e_27)[name = string("op_9090")]; + tensor var_9091 = const()[name = string("op_9091"), val = tensor([0, 2, 1])]; + tensor var_9092 = transpose(perm = var_9091, x = var_9090)[name = string("transpose_126")]; + tensor hidden_states_141_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = var_9092)[name = string("hidden_states_141_cast_fp16")]; + int32 var_9104 = const()[name = string("op_9104"), val = int32(-1)]; + fp16 const_477_promoted_to_fp16 = const()[name = string("const_477_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9106_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_477_promoted_to_fp16)[name = string("op_9106_cast_fp16")]; + bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; + tensor input_253_cast_fp16 = concat(axis = var_9104, interleave = input_253_interleave_0, values = (hidden_states_141_cast_fp16, var_9106_cast_fp16))[name = string("input_253_cast_fp16")]; + tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; + fp16 var_9101_to_fp16 = const()[name = string("op_9101_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_9101_to_fp16, x = input_253_cast_fp16)[name = string("normed_225_cast_fp16")]; + tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_227_cast_fp16 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227_cast_fp16")]; + tensor const_480_promoted_to_fp16 = const()[name = string("const_480_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436430272)))]; + tensor hidden_states_143_cast_fp16 = mul(x = normed_227_cast_fp16, y = const_480_promoted_to_fp16)[name = string("hidden_states_143_cast_fp16")]; + tensor var_9129 = const()[name = string("op_9129"), val = tensor([0, 2, 1])]; + tensor var_9132_axes_0 = const()[name = string("op_9132_axes_0"), val = tensor([2])]; + tensor var_9130_cast_fp16 = transpose(perm = var_9129, x = hidden_states_143_cast_fp16)[name = string("transpose_125")]; + tensor var_9132_cast_fp16 = expand_dims(axes = var_9132_axes_0, x = var_9130_cast_fp16)[name = string("op_9132_cast_fp16")]; + string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; + tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; + tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; + int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; + tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_9132_cast_fp16)[name = string("query_states_113")]; + string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; + tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; + tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; + int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; + tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_9132_cast_fp16)[name = string("key_states_141")]; + string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; + tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; + tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; + int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; + tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_9132_cast_fp16)[name = string("value_states_113")]; + tensor var_9174 = const()[name = string("op_9174"), val = tensor([1, 16, 128, 64])]; + tensor var_9175 = reshape(shape = var_9174, x = query_states_113)[name = string("op_9175")]; + tensor var_9180 = const()[name = string("op_9180"), val = tensor([0, 1, 3, 2])]; + tensor var_9185 = const()[name = string("op_9185"), val = tensor([1, 8, 128, 64])]; + tensor var_9186 = reshape(shape = var_9185, x = key_states_141)[name = string("op_9186")]; + tensor var_9191 = const()[name = string("op_9191"), val = tensor([0, 1, 3, 2])]; + tensor var_9196 = const()[name = string("op_9196"), val = tensor([1, 8, 128, 64])]; + tensor var_9197 = reshape(shape = var_9196, x = value_states_113)[name = string("op_9197")]; + tensor var_9202 = const()[name = string("op_9202"), val = tensor([0, 1, 3, 2])]; + int32 var_9213 = const()[name = string("op_9213"), val = int32(-1)]; + fp16 const_482_promoted = const()[name = string("const_482_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_145 = transpose(perm = var_9180, x = var_9175)[name = string("transpose_124")]; + tensor var_9215 = mul(x = hidden_states_145, y = const_482_promoted)[name = string("op_9215")]; + bool input_257_interleave_0 = const()[name = string("input_257_interleave_0"), val = bool(false)]; + tensor input_257 = concat(axis = var_9213, interleave = input_257_interleave_0, values = (hidden_states_145, var_9215))[name = string("input_257")]; + tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; + fp16 var_9210_to_fp16 = const()[name = string("op_9210_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_9210_to_fp16, x = input_257)[name = string("normed_229_cast_fp16")]; + tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_231 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231")]; + tensor const_485 = const()[name = string("const_485"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436432384)))]; + tensor q_29 = mul(x = normed_231, y = const_485)[name = string("q_29")]; + int32 var_9238 = const()[name = string("op_9238"), val = int32(-1)]; + fp16 const_486_promoted = const()[name = string("const_486_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_147 = transpose(perm = var_9191, x = var_9186)[name = string("transpose_123")]; + tensor var_9240 = mul(x = hidden_states_147, y = const_486_promoted)[name = string("op_9240")]; + bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; + tensor input_259 = concat(axis = var_9238, interleave = input_259_interleave_0, values = (hidden_states_147, var_9240))[name = string("input_259")]; + tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; + fp16 var_9235_to_fp16 = const()[name = string("op_9235_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_9235_to_fp16, x = input_259)[name = string("normed_233_cast_fp16")]; + tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_235 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235")]; + tensor const_489 = const()[name = string("const_489"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436432704)))]; + tensor k_29 = mul(x = normed_235, y = const_489)[name = string("k_29")]; + tensor var_9266 = mul(x = q_29, y = cos_5)[name = string("op_9266")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29)[name = string("x1_57")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29)[name = string("x2_57")]; + fp16 const_492_promoted = const()[name = string("const_492_promoted"), val = fp16(-0x1p+0)]; + tensor var_9287 = mul(x = x2_57, y = const_492_promoted)[name = string("op_9287")]; + int32 var_9289 = const()[name = string("op_9289"), val = int32(-1)]; + bool var_9290_interleave_0 = const()[name = string("op_9290_interleave_0"), val = bool(false)]; + tensor var_9290 = concat(axis = var_9289, interleave = var_9290_interleave_0, values = (var_9287, x1_57))[name = string("op_9290")]; + tensor var_9291 = mul(x = var_9290, y = sin_5)[name = string("op_9291")]; + tensor query_states_115 = add(x = var_9266, y = var_9291)[name = string("query_states_115")]; + tensor var_9294 = mul(x = k_29, y = cos_5)[name = string("op_9294")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29)[name = string("x1_59")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29)[name = string("x2_59")]; + fp16 const_495_promoted = const()[name = string("const_495_promoted"), val = fp16(-0x1p+0)]; + tensor var_9315 = mul(x = x2_59, y = const_495_promoted)[name = string("op_9315")]; + int32 var_9317 = const()[name = string("op_9317"), val = int32(-1)]; + bool var_9318_interleave_0 = const()[name = string("op_9318_interleave_0"), val = bool(false)]; + tensor var_9318 = concat(axis = var_9317, interleave = var_9318_interleave_0, values = (var_9315, x1_59))[name = string("op_9318")]; + tensor var_9319 = mul(x = var_9318, y = sin_5)[name = string("op_9319")]; + tensor key_states_143 = add(x = var_9294, y = var_9319)[name = string("key_states_143")]; + tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([14])]; + tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; + tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; + tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([15])]; + int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; + bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; + tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_254")]; + tensor concat_255_values1_0 = const()[name = string("concat_255_values1_0"), val = tensor([0])]; + tensor concat_255_values3_0 = const()[name = string("concat_255_values3_0"), val = tensor([0])]; + int32 concat_255_axis_0 = const()[name = string("concat_255_axis_0"), val = int32(0)]; + bool concat_255_interleave_0 = const()[name = string("concat_255_interleave_0"), val = bool(false)]; + tensor concat_255 = concat(axis = concat_255_axis_0, interleave = concat_255_interleave_0, values = (expand_dims_172, concat_255_values1_0, var_1781, concat_255_values3_0))[name = string("concat_255")]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_0_internal_tensor_assign_29_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_0_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_29_stride_0, update = key_states_143, x = coreml_update_state_83)[name = string("model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_84")]; + tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([42])]; + tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; + tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([43])]; + int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; + bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; + tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_258")]; + tensor concat_259_values1_0 = const()[name = string("concat_259_values1_0"), val = tensor([0])]; + tensor concat_259_values3_0 = const()[name = string("concat_259_values3_0"), val = tensor([0])]; + int32 concat_259_axis_0 = const()[name = string("concat_259_axis_0"), val = int32(0)]; + bool concat_259_interleave_0 = const()[name = string("concat_259_interleave_0"), val = bool(false)]; + tensor concat_259 = concat(axis = concat_259_axis_0, interleave = concat_259_interleave_0, values = (expand_dims_178, concat_259_values1_0, var_1781, concat_259_values3_0))[name = string("concat_259")]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_115 = transpose(perm = var_9202, x = var_9197)[name = string("transpose_122")]; + tensor model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_258, begin_mask = model_model_kv_cache_0_internal_tensor_assign_30_begin_mask_0, end = concat_259, end_mask = model_model_kv_cache_0_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_30_stride_0, update = value_states_115, x = coreml_update_state_84)[name = string("model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_85")]; + tensor var_9390_begin_0 = const()[name = string("op_9390_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor var_9390_end_0 = const()[name = string("op_9390_end_0"), val = tensor([15, 8, 4096, 128])]; + tensor var_9390_end_mask_0 = const()[name = string("op_9390_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9390_cast_fp16 = slice_by_index(begin = var_9390_begin_0, end = var_9390_end_0, end_mask = var_9390_end_mask_0, x = coreml_update_state_85)[name = string("op_9390_cast_fp16")]; + tensor K_layer_cache_29_axes_0 = const()[name = string("K_layer_cache_29_axes_0"), val = tensor([0])]; + tensor K_layer_cache_29_cast_fp16 = squeeze(axes = K_layer_cache_29_axes_0, x = var_9390_cast_fp16)[name = string("K_layer_cache_29_cast_fp16")]; + tensor var_9397_begin_0 = const()[name = string("op_9397_begin_0"), val = tensor([42, 0, 0, 0])]; + tensor var_9397_end_0 = const()[name = string("op_9397_end_0"), val = tensor([43, 8, 4096, 128])]; + tensor var_9397_end_mask_0 = const()[name = string("op_9397_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9397_cast_fp16 = slice_by_index(begin = var_9397_begin_0, end = var_9397_end_0, end_mask = var_9397_end_mask_0, x = coreml_update_state_85)[name = string("op_9397_cast_fp16")]; + tensor V_layer_cache_29_axes_0 = const()[name = string("V_layer_cache_29_axes_0"), val = tensor([0])]; + tensor V_layer_cache_29_cast_fp16 = squeeze(axes = V_layer_cache_29_axes_0, x = var_9397_cast_fp16)[name = string("V_layer_cache_29_cast_fp16")]; + tensor x_227_axes_0 = const()[name = string("x_227_axes_0"), val = tensor([1])]; + tensor x_227_cast_fp16 = expand_dims(axes = x_227_axes_0, x = K_layer_cache_29_cast_fp16)[name = string("x_227_cast_fp16")]; + tensor var_9426 = const()[name = string("op_9426"), val = tensor([1, 2, 1, 1])]; + tensor x_229_cast_fp16 = tile(reps = var_9426, x = x_227_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_9438 = const()[name = string("op_9438"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_147_cast_fp16 = reshape(shape = var_9438, x = x_229_cast_fp16)[name = string("key_states_147_cast_fp16")]; + tensor x_233_axes_0 = const()[name = string("x_233_axes_0"), val = tensor([1])]; + tensor x_233_cast_fp16 = expand_dims(axes = x_233_axes_0, x = V_layer_cache_29_cast_fp16)[name = string("x_233_cast_fp16")]; + tensor var_9446 = const()[name = string("op_9446"), val = tensor([1, 2, 1, 1])]; + tensor x_235_cast_fp16 = tile(reps = var_9446, x = x_233_cast_fp16)[name = string("x_235_cast_fp16")]; + bool var_9473_transpose_x_0 = const()[name = string("op_9473_transpose_x_0"), val = bool(false)]; + bool var_9473_transpose_y_0 = const()[name = string("op_9473_transpose_y_0"), val = bool(true)]; + tensor var_9473 = matmul(transpose_x = var_9473_transpose_x_0, transpose_y = var_9473_transpose_y_0, x = query_states_115, y = key_states_147_cast_fp16)[name = string("op_9473")]; + fp16 var_9474_to_fp16 = const()[name = string("op_9474_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_57_cast_fp16 = mul(x = var_9473, y = var_9474_to_fp16)[name = string("attn_weights_57_cast_fp16")]; + tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = causal_mask)[name = string("attn_weights_59_cast_fp16")]; + int32 var_9509 = const()[name = string("op_9509"), val = int32(-1)]; + tensor var_9511_cast_fp16 = softmax(axis = var_9509, x = attn_weights_59_cast_fp16)[name = string("op_9511_cast_fp16")]; + tensor concat_264 = const()[name = string("concat_264"), val = tensor([16, 64, 4096])]; + tensor reshape_42_cast_fp16 = reshape(shape = concat_264, x = var_9511_cast_fp16)[name = string("reshape_42_cast_fp16")]; + tensor concat_265 = const()[name = string("concat_265"), val = tensor([16, 4096, 128])]; + tensor reshape_43_cast_fp16 = reshape(shape = concat_265, x = x_235_cast_fp16)[name = string("reshape_43_cast_fp16")]; + bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; + bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; + tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; + tensor concat_269 = const()[name = string("concat_269"), val = tensor([1, 16, 64, 128])]; + tensor reshape_44_cast_fp16 = reshape(shape = concat_269, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; + tensor var_9523_perm_0 = const()[name = string("op_9523_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_9542 = const()[name = string("op_9542"), val = tensor([1, 64, 2048])]; + tensor var_9523_cast_fp16 = transpose(perm = var_9523_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_121")]; + tensor attn_output_145_cast_fp16 = reshape(shape = var_9542, x = var_9523_cast_fp16)[name = string("attn_output_145_cast_fp16")]; + tensor var_9547 = const()[name = string("op_9547"), val = tensor([0, 2, 1])]; + string var_9563_pad_type_0 = const()[name = string("op_9563_pad_type_0"), val = string("valid")]; + int32 var_9563_groups_0 = const()[name = string("op_9563_groups_0"), val = int32(1)]; + tensor var_9563_strides_0 = const()[name = string("op_9563_strides_0"), val = tensor([1])]; + tensor var_9563_pad_0 = const()[name = string("op_9563_pad_0"), val = tensor([0, 0])]; + tensor var_9563_dilations_0 = const()[name = string("op_9563_dilations_0"), val = tensor([1])]; + tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436433024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438530240))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_9548_cast_fp16 = transpose(perm = var_9547, x = attn_output_145_cast_fp16)[name = string("transpose_120")]; + tensor var_9563_cast_fp16 = conv(dilations = var_9563_dilations_0, groups = var_9563_groups_0, pad = var_9563_pad_0, pad_type = var_9563_pad_type_0, strides = var_9563_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_9548_cast_fp16)[name = string("op_9563_cast_fp16")]; + tensor var_9567 = const()[name = string("op_9567"), val = tensor([0, 2, 1])]; + tensor attn_output_149_cast_fp16 = transpose(perm = var_9567, x = var_9563_cast_fp16)[name = string("transpose_119")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_141_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + int32 var_9580 = const()[name = string("op_9580"), val = int32(-1)]; + fp16 const_507_promoted_to_fp16 = const()[name = string("const_507_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9582_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = const_507_promoted_to_fp16)[name = string("op_9582_cast_fp16")]; + bool input_263_interleave_0 = const()[name = string("input_263_interleave_0"), val = bool(false)]; + tensor input_263_cast_fp16 = concat(axis = var_9580, interleave = input_263_interleave_0, values = (hidden_states_149_cast_fp16, var_9582_cast_fp16))[name = string("input_263_cast_fp16")]; + tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; + fp16 var_9577_to_fp16 = const()[name = string("op_9577_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_9577_to_fp16, x = input_263_cast_fp16)[name = string("normed_237_cast_fp16")]; + tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; + tensor const_510_promoted_to_fp16 = const()[name = string("const_510_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438595840)))]; + tensor x_237_cast_fp16 = mul(x = normed_239_cast_fp16, y = const_510_promoted_to_fp16)[name = string("x_237_cast_fp16")]; + tensor var_9607 = const()[name = string("op_9607"), val = tensor([0, 2, 1])]; + tensor input_265_axes_0 = const()[name = string("input_265_axes_0"), val = tensor([2])]; + tensor var_9608 = transpose(perm = var_9607, x = x_237_cast_fp16)[name = string("transpose_118")]; + tensor input_265 = expand_dims(axes = input_265_axes_0, x = var_9608)[name = string("input_265")]; + string input_267_pad_type_0 = const()[name = string("input_267_pad_type_0"), val = string("valid")]; + tensor input_267_strides_0 = const()[name = string("input_267_strides_0"), val = tensor([1, 1])]; + tensor input_267_pad_0 = const()[name = string("input_267_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_267_dilations_0 = const()[name = string("input_267_dilations_0"), val = tensor([1, 1])]; + int32 input_267_groups_0 = const()[name = string("input_267_groups_0"), val = int32(1)]; + tensor input_267 = conv(dilations = input_267_dilations_0, groups = input_267_groups_0, pad = input_267_pad_0, pad_type = input_267_pad_type_0, strides = input_267_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_265)[name = string("input_267")]; + string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; + tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; + tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; + int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; + tensor b_29 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_265)[name = string("b_29")]; + tensor c_29 = silu(x = input_267)[name = string("c_29")]; + tensor input_269 = mul(x = c_29, y = b_29)[name = string("input_269")]; + string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; + tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; + tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; + int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; + tensor e_29 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_269)[name = string("e_29")]; + tensor var_9630_axes_0 = const()[name = string("op_9630_axes_0"), val = tensor([2])]; + tensor var_9630 = squeeze(axes = var_9630_axes_0, x = e_29)[name = string("op_9630")]; + tensor var_9631 = const()[name = string("op_9631"), val = tensor([0, 2, 1])]; + tensor var_9632 = transpose(perm = var_9631, x = var_9630)[name = string("transpose_117")]; + tensor hidden_states_151_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = var_9632)[name = string("hidden_states_151_cast_fp16")]; + int32 var_9644 = const()[name = string("op_9644"), val = int32(-1)]; + fp16 const_511_promoted_to_fp16 = const()[name = string("const_511_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_9646_cast_fp16 = mul(x = hidden_states_151_cast_fp16, y = const_511_promoted_to_fp16)[name = string("op_9646_cast_fp16")]; + bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; + tensor input_271_cast_fp16 = concat(axis = var_9644, interleave = input_271_interleave_0, values = (hidden_states_151_cast_fp16, var_9646_cast_fp16))[name = string("input_271_cast_fp16")]; + tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; + fp16 var_9641_to_fp16 = const()[name = string("op_9641_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_9641_to_fp16, x = input_271_cast_fp16)[name = string("normed_241_cast_fp16")]; + tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; + tensor const_514_promoted_to_fp16 = const()[name = string("const_514_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438597952)))]; + tensor hidden_states_153_cast_fp16 = mul(x = normed_243_cast_fp16, y = const_514_promoted_to_fp16)[name = string("hidden_states_153_cast_fp16")]; + tensor var_9669 = const()[name = string("op_9669"), val = tensor([0, 2, 1])]; + tensor var_9672_axes_0 = const()[name = string("op_9672_axes_0"), val = tensor([2])]; + tensor var_9670_cast_fp16 = transpose(perm = var_9669, x = hidden_states_153_cast_fp16)[name = string("transpose_116")]; + tensor var_9672_cast_fp16 = expand_dims(axes = var_9672_axes_0, x = var_9670_cast_fp16)[name = string("op_9672_cast_fp16")]; + string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; + tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; + tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; + int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; + tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_9672_cast_fp16)[name = string("query_states_121")]; + string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; + tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; + tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; + int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; + tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_9672_cast_fp16)[name = string("key_states_151")]; + string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; + tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; + tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; + int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; + tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_9672_cast_fp16)[name = string("value_states_121")]; + tensor var_9714 = const()[name = string("op_9714"), val = tensor([1, 16, 128, 64])]; + tensor var_9715 = reshape(shape = var_9714, x = query_states_121)[name = string("op_9715")]; + tensor var_9720 = const()[name = string("op_9720"), val = tensor([0, 1, 3, 2])]; + tensor var_9725 = const()[name = string("op_9725"), val = tensor([1, 8, 128, 64])]; + tensor var_9726 = reshape(shape = var_9725, x = key_states_151)[name = string("op_9726")]; + tensor var_9731 = const()[name = string("op_9731"), val = tensor([0, 1, 3, 2])]; + tensor var_9736 = const()[name = string("op_9736"), val = tensor([1, 8, 128, 64])]; + tensor var_9737 = reshape(shape = var_9736, x = value_states_121)[name = string("op_9737")]; + tensor var_9742 = const()[name = string("op_9742"), val = tensor([0, 1, 3, 2])]; + int32 var_9753 = const()[name = string("op_9753"), val = int32(-1)]; + fp16 const_516_promoted = const()[name = string("const_516_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_155 = transpose(perm = var_9720, x = var_9715)[name = string("transpose_115")]; + tensor var_9755 = mul(x = hidden_states_155, y = const_516_promoted)[name = string("op_9755")]; + bool input_275_interleave_0 = const()[name = string("input_275_interleave_0"), val = bool(false)]; + tensor input_275 = concat(axis = var_9753, interleave = input_275_interleave_0, values = (hidden_states_155, var_9755))[name = string("input_275")]; + tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; + fp16 var_9750_to_fp16 = const()[name = string("op_9750_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_9750_to_fp16, x = input_275)[name = string("normed_245_cast_fp16")]; + tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; + tensor const_519 = const()[name = string("const_519"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438600064)))]; + tensor q_31 = mul(x = normed_247, y = const_519)[name = string("q_31")]; + int32 var_9778 = const()[name = string("op_9778"), val = int32(-1)]; + fp16 const_520_promoted = const()[name = string("const_520_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_157 = transpose(perm = var_9731, x = var_9726)[name = string("transpose_114")]; + tensor var_9780 = mul(x = hidden_states_157, y = const_520_promoted)[name = string("op_9780")]; + bool input_277_interleave_0 = const()[name = string("input_277_interleave_0"), val = bool(false)]; + tensor input_277 = concat(axis = var_9778, interleave = input_277_interleave_0, values = (hidden_states_157, var_9780))[name = string("input_277")]; + tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; + fp16 var_9775_to_fp16 = const()[name = string("op_9775_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_9775_to_fp16, x = input_277)[name = string("normed_249_cast_fp16")]; + tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; + tensor const_523 = const()[name = string("const_523"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438600384)))]; + tensor k_31 = mul(x = normed_251, y = const_523)[name = string("k_31")]; + tensor var_9806 = mul(x = q_31, y = cos_5)[name = string("op_9806")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31)[name = string("x1_61")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31)[name = string("x2_61")]; + fp16 const_526_promoted = const()[name = string("const_526_promoted"), val = fp16(-0x1p+0)]; + tensor var_9827 = mul(x = x2_61, y = const_526_promoted)[name = string("op_9827")]; + int32 var_9829 = const()[name = string("op_9829"), val = int32(-1)]; + bool var_9830_interleave_0 = const()[name = string("op_9830_interleave_0"), val = bool(false)]; + tensor var_9830 = concat(axis = var_9829, interleave = var_9830_interleave_0, values = (var_9827, x1_61))[name = string("op_9830")]; + tensor var_9831 = mul(x = var_9830, y = sin_5)[name = string("op_9831")]; + tensor query_states_123 = add(x = var_9806, y = var_9831)[name = string("query_states_123")]; + tensor var_9834 = mul(x = k_31, y = cos_5)[name = string("op_9834")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31)[name = string("x1_63")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31)[name = string("x2_63")]; + fp16 const_529_promoted = const()[name = string("const_529_promoted"), val = fp16(-0x1p+0)]; + tensor var_9855 = mul(x = x2_63, y = const_529_promoted)[name = string("op_9855")]; + int32 var_9857 = const()[name = string("op_9857"), val = int32(-1)]; + bool var_9858_interleave_0 = const()[name = string("op_9858_interleave_0"), val = bool(false)]; + tensor var_9858 = concat(axis = var_9857, interleave = var_9858_interleave_0, values = (var_9855, x1_63))[name = string("op_9858")]; + tensor var_9859 = mul(x = var_9858, y = sin_5)[name = string("op_9859")]; + tensor key_states_153 = add(x = var_9834, y = var_9859)[name = string("key_states_153")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([15])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([16])]; + int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; + bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; + tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_272")]; + tensor concat_273_values1_0 = const()[name = string("concat_273_values1_0"), val = tensor([0])]; + tensor concat_273_values3_0 = const()[name = string("concat_273_values3_0"), val = tensor([0])]; + int32 concat_273_axis_0 = const()[name = string("concat_273_axis_0"), val = int32(0)]; + bool concat_273_interleave_0 = const()[name = string("concat_273_interleave_0"), val = bool(false)]; + tensor concat_273 = concat(axis = concat_273_axis_0, interleave = concat_273_interleave_0, values = (expand_dims_184, concat_273_values1_0, var_1781, concat_273_values3_0))[name = string("concat_273")]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_272, begin_mask = model_model_kv_cache_0_internal_tensor_assign_31_begin_mask_0, end = concat_273, end_mask = model_model_kv_cache_0_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_31_stride_0, update = key_states_153, x = coreml_update_state_85)[name = string("model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_86")]; + tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([43])]; + tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; + tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; + tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([44])]; + int32 concat_276_axis_0 = const()[name = string("concat_276_axis_0"), val = int32(0)]; + bool concat_276_interleave_0 = const()[name = string("concat_276_interleave_0"), val = bool(false)]; + tensor concat_276 = concat(axis = concat_276_axis_0, interleave = concat_276_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_276")]; + tensor concat_277_values1_0 = const()[name = string("concat_277_values1_0"), val = tensor([0])]; + tensor concat_277_values3_0 = const()[name = string("concat_277_values3_0"), val = tensor([0])]; + int32 concat_277_axis_0 = const()[name = string("concat_277_axis_0"), val = int32(0)]; + bool concat_277_interleave_0 = const()[name = string("concat_277_interleave_0"), val = bool(false)]; + tensor concat_277 = concat(axis = concat_277_axis_0, interleave = concat_277_interleave_0, values = (expand_dims_190, concat_277_values1_0, var_1781, concat_277_values3_0))[name = string("concat_277")]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_123 = transpose(perm = var_9742, x = var_9737)[name = string("transpose_113")]; + tensor model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_276, begin_mask = model_model_kv_cache_0_internal_tensor_assign_32_begin_mask_0, end = concat_277, end_mask = model_model_kv_cache_0_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_32_stride_0, update = value_states_123, x = coreml_update_state_86)[name = string("model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_87")]; + tensor var_9930_begin_0 = const()[name = string("op_9930_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor var_9930_end_0 = const()[name = string("op_9930_end_0"), val = tensor([16, 8, 4096, 128])]; + tensor var_9930_end_mask_0 = const()[name = string("op_9930_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9930_cast_fp16 = slice_by_index(begin = var_9930_begin_0, end = var_9930_end_0, end_mask = var_9930_end_mask_0, x = coreml_update_state_87)[name = string("op_9930_cast_fp16")]; + tensor K_layer_cache_31_axes_0 = const()[name = string("K_layer_cache_31_axes_0"), val = tensor([0])]; + tensor K_layer_cache_31_cast_fp16 = squeeze(axes = K_layer_cache_31_axes_0, x = var_9930_cast_fp16)[name = string("K_layer_cache_31_cast_fp16")]; + tensor var_9937_begin_0 = const()[name = string("op_9937_begin_0"), val = tensor([43, 0, 0, 0])]; + tensor var_9937_end_0 = const()[name = string("op_9937_end_0"), val = tensor([44, 8, 4096, 128])]; + tensor var_9937_end_mask_0 = const()[name = string("op_9937_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_9937_cast_fp16 = slice_by_index(begin = var_9937_begin_0, end = var_9937_end_0, end_mask = var_9937_end_mask_0, x = coreml_update_state_87)[name = string("op_9937_cast_fp16")]; + tensor V_layer_cache_31_axes_0 = const()[name = string("V_layer_cache_31_axes_0"), val = tensor([0])]; + tensor V_layer_cache_31_cast_fp16 = squeeze(axes = V_layer_cache_31_axes_0, x = var_9937_cast_fp16)[name = string("V_layer_cache_31_cast_fp16")]; + tensor x_243_axes_0 = const()[name = string("x_243_axes_0"), val = tensor([1])]; + tensor x_243_cast_fp16 = expand_dims(axes = x_243_axes_0, x = K_layer_cache_31_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_9966 = const()[name = string("op_9966"), val = tensor([1, 2, 1, 1])]; + tensor x_245_cast_fp16 = tile(reps = var_9966, x = x_243_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_9978 = const()[name = string("op_9978"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_157_cast_fp16 = reshape(shape = var_9978, x = x_245_cast_fp16)[name = string("key_states_157_cast_fp16")]; + tensor x_249_axes_0 = const()[name = string("x_249_axes_0"), val = tensor([1])]; + tensor x_249_cast_fp16 = expand_dims(axes = x_249_axes_0, x = V_layer_cache_31_cast_fp16)[name = string("x_249_cast_fp16")]; + tensor var_9986 = const()[name = string("op_9986"), val = tensor([1, 2, 1, 1])]; + tensor x_251_cast_fp16 = tile(reps = var_9986, x = x_249_cast_fp16)[name = string("x_251_cast_fp16")]; + bool var_10013_transpose_x_0 = const()[name = string("op_10013_transpose_x_0"), val = bool(false)]; + bool var_10013_transpose_y_0 = const()[name = string("op_10013_transpose_y_0"), val = bool(true)]; + tensor var_10013 = matmul(transpose_x = var_10013_transpose_x_0, transpose_y = var_10013_transpose_y_0, x = query_states_123, y = key_states_157_cast_fp16)[name = string("op_10013")]; + fp16 var_10014_to_fp16 = const()[name = string("op_10014_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_61_cast_fp16 = mul(x = var_10013, y = var_10014_to_fp16)[name = string("attn_weights_61_cast_fp16")]; + tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = causal_mask)[name = string("attn_weights_63_cast_fp16")]; + int32 var_10049 = const()[name = string("op_10049"), val = int32(-1)]; + tensor var_10051_cast_fp16 = softmax(axis = var_10049, x = attn_weights_63_cast_fp16)[name = string("op_10051_cast_fp16")]; + tensor concat_282 = const()[name = string("concat_282"), val = tensor([16, 64, 4096])]; + tensor reshape_45_cast_fp16 = reshape(shape = concat_282, x = var_10051_cast_fp16)[name = string("reshape_45_cast_fp16")]; + tensor concat_283 = const()[name = string("concat_283"), val = tensor([16, 4096, 128])]; + tensor reshape_46_cast_fp16 = reshape(shape = concat_283, x = x_251_cast_fp16)[name = string("reshape_46_cast_fp16")]; + bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; + bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; + tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; + tensor concat_287 = const()[name = string("concat_287"), val = tensor([1, 16, 64, 128])]; + tensor reshape_47_cast_fp16 = reshape(shape = concat_287, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; + tensor var_10063_perm_0 = const()[name = string("op_10063_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10082 = const()[name = string("op_10082"), val = tensor([1, 64, 2048])]; + tensor var_10063_cast_fp16 = transpose(perm = var_10063_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_112")]; + tensor attn_output_155_cast_fp16 = reshape(shape = var_10082, x = var_10063_cast_fp16)[name = string("attn_output_155_cast_fp16")]; + tensor var_10087 = const()[name = string("op_10087"), val = tensor([0, 2, 1])]; + string var_10103_pad_type_0 = const()[name = string("op_10103_pad_type_0"), val = string("valid")]; + int32 var_10103_groups_0 = const()[name = string("op_10103_groups_0"), val = int32(1)]; + tensor var_10103_strides_0 = const()[name = string("op_10103_strides_0"), val = tensor([1])]; + tensor var_10103_pad_0 = const()[name = string("op_10103_pad_0"), val = tensor([0, 0])]; + tensor var_10103_dilations_0 = const()[name = string("op_10103_dilations_0"), val = tensor([1])]; + tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438600704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440697920))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10088_cast_fp16 = transpose(perm = var_10087, x = attn_output_155_cast_fp16)[name = string("transpose_111")]; + tensor var_10103_cast_fp16 = conv(dilations = var_10103_dilations_0, groups = var_10103_groups_0, pad = var_10103_pad_0, pad_type = var_10103_pad_type_0, strides = var_10103_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_10088_cast_fp16)[name = string("op_10103_cast_fp16")]; + tensor var_10107 = const()[name = string("op_10107"), val = tensor([0, 2, 1])]; + tensor attn_output_159_cast_fp16 = transpose(perm = var_10107, x = var_10103_cast_fp16)[name = string("transpose_110")]; + tensor hidden_states_159_cast_fp16 = add(x = hidden_states_151_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; + int32 var_10120 = const()[name = string("op_10120"), val = int32(-1)]; + fp16 const_541_promoted_to_fp16 = const()[name = string("const_541_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10122_cast_fp16 = mul(x = hidden_states_159_cast_fp16, y = const_541_promoted_to_fp16)[name = string("op_10122_cast_fp16")]; + bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; + tensor input_281_cast_fp16 = concat(axis = var_10120, interleave = input_281_interleave_0, values = (hidden_states_159_cast_fp16, var_10122_cast_fp16))[name = string("input_281_cast_fp16")]; + tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; + fp16 var_10117_to_fp16 = const()[name = string("op_10117_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_10117_to_fp16, x = input_281_cast_fp16)[name = string("normed_253_cast_fp16")]; + tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; + tensor const_544_promoted_to_fp16 = const()[name = string("const_544_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440763520)))]; + tensor x_253_cast_fp16 = mul(x = normed_255_cast_fp16, y = const_544_promoted_to_fp16)[name = string("x_253_cast_fp16")]; + tensor var_10147 = const()[name = string("op_10147"), val = tensor([0, 2, 1])]; + tensor input_283_axes_0 = const()[name = string("input_283_axes_0"), val = tensor([2])]; + tensor var_10148 = transpose(perm = var_10147, x = x_253_cast_fp16)[name = string("transpose_109")]; + tensor input_283 = expand_dims(axes = input_283_axes_0, x = var_10148)[name = string("input_283")]; + string input_285_pad_type_0 = const()[name = string("input_285_pad_type_0"), val = string("valid")]; + tensor input_285_strides_0 = const()[name = string("input_285_strides_0"), val = tensor([1, 1])]; + tensor input_285_pad_0 = const()[name = string("input_285_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_285_dilations_0 = const()[name = string("input_285_dilations_0"), val = tensor([1, 1])]; + int32 input_285_groups_0 = const()[name = string("input_285_groups_0"), val = int32(1)]; + tensor input_285 = conv(dilations = input_285_dilations_0, groups = input_285_groups_0, pad = input_285_pad_0, pad_type = input_285_pad_type_0, strides = input_285_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_283)[name = string("input_285")]; + string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; + tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; + tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; + int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; + tensor b_31 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_283)[name = string("b_31")]; + tensor c_31 = silu(x = input_285)[name = string("c_31")]; + tensor input_287 = mul(x = c_31, y = b_31)[name = string("input_287")]; + string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; + tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; + tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; + int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; + tensor e_31 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_287)[name = string("e_31")]; + tensor var_10170_axes_0 = const()[name = string("op_10170_axes_0"), val = tensor([2])]; + tensor var_10170 = squeeze(axes = var_10170_axes_0, x = e_31)[name = string("op_10170")]; + tensor var_10171 = const()[name = string("op_10171"), val = tensor([0, 2, 1])]; + tensor var_10172 = transpose(perm = var_10171, x = var_10170)[name = string("transpose_108")]; + tensor hidden_states_161_cast_fp16 = add(x = hidden_states_159_cast_fp16, y = var_10172)[name = string("hidden_states_161_cast_fp16")]; + int32 var_10184 = const()[name = string("op_10184"), val = int32(-1)]; + fp16 const_545_promoted_to_fp16 = const()[name = string("const_545_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10186_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_545_promoted_to_fp16)[name = string("op_10186_cast_fp16")]; + bool input_289_interleave_0 = const()[name = string("input_289_interleave_0"), val = bool(false)]; + tensor input_289_cast_fp16 = concat(axis = var_10184, interleave = input_289_interleave_0, values = (hidden_states_161_cast_fp16, var_10186_cast_fp16))[name = string("input_289_cast_fp16")]; + tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; + fp16 var_10181_to_fp16 = const()[name = string("op_10181_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_10181_to_fp16, x = input_289_cast_fp16)[name = string("normed_257_cast_fp16")]; + tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; + tensor const_548_promoted_to_fp16 = const()[name = string("const_548_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440765632)))]; + tensor hidden_states_163_cast_fp16 = mul(x = normed_259_cast_fp16, y = const_548_promoted_to_fp16)[name = string("hidden_states_163_cast_fp16")]; + tensor var_10209 = const()[name = string("op_10209"), val = tensor([0, 2, 1])]; + tensor var_10212_axes_0 = const()[name = string("op_10212_axes_0"), val = tensor([2])]; + tensor var_10210_cast_fp16 = transpose(perm = var_10209, x = hidden_states_163_cast_fp16)[name = string("transpose_107")]; + tensor var_10212_cast_fp16 = expand_dims(axes = var_10212_axes_0, x = var_10210_cast_fp16)[name = string("op_10212_cast_fp16")]; + string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; + tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; + tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; + int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; + tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_10212_cast_fp16)[name = string("query_states_129")]; + string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; + tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; + tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; + int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; + tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_10212_cast_fp16)[name = string("key_states_161")]; + string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; + tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; + tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; + int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; + tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_10212_cast_fp16)[name = string("value_states_129")]; + tensor var_10254 = const()[name = string("op_10254"), val = tensor([1, 16, 128, 64])]; + tensor var_10255 = reshape(shape = var_10254, x = query_states_129)[name = string("op_10255")]; + tensor var_10260 = const()[name = string("op_10260"), val = tensor([0, 1, 3, 2])]; + tensor var_10265 = const()[name = string("op_10265"), val = tensor([1, 8, 128, 64])]; + tensor var_10266 = reshape(shape = var_10265, x = key_states_161)[name = string("op_10266")]; + tensor var_10271 = const()[name = string("op_10271"), val = tensor([0, 1, 3, 2])]; + tensor var_10276 = const()[name = string("op_10276"), val = tensor([1, 8, 128, 64])]; + tensor var_10277 = reshape(shape = var_10276, x = value_states_129)[name = string("op_10277")]; + tensor var_10282 = const()[name = string("op_10282"), val = tensor([0, 1, 3, 2])]; + int32 var_10293 = const()[name = string("op_10293"), val = int32(-1)]; + fp16 const_550_promoted = const()[name = string("const_550_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_165 = transpose(perm = var_10260, x = var_10255)[name = string("transpose_106")]; + tensor var_10295 = mul(x = hidden_states_165, y = const_550_promoted)[name = string("op_10295")]; + bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; + tensor input_293 = concat(axis = var_10293, interleave = input_293_interleave_0, values = (hidden_states_165, var_10295))[name = string("input_293")]; + tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; + fp16 var_10290_to_fp16 = const()[name = string("op_10290_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_10290_to_fp16, x = input_293)[name = string("normed_261_cast_fp16")]; + tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_263 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263")]; + tensor const_553 = const()[name = string("const_553"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440767744)))]; + tensor q_33 = mul(x = normed_263, y = const_553)[name = string("q_33")]; + int32 var_10318 = const()[name = string("op_10318"), val = int32(-1)]; + fp16 const_554_promoted = const()[name = string("const_554_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_167 = transpose(perm = var_10271, x = var_10266)[name = string("transpose_105")]; + tensor var_10320 = mul(x = hidden_states_167, y = const_554_promoted)[name = string("op_10320")]; + bool input_295_interleave_0 = const()[name = string("input_295_interleave_0"), val = bool(false)]; + tensor input_295 = concat(axis = var_10318, interleave = input_295_interleave_0, values = (hidden_states_167, var_10320))[name = string("input_295")]; + tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; + fp16 var_10315_to_fp16 = const()[name = string("op_10315_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_10315_to_fp16, x = input_295)[name = string("normed_265_cast_fp16")]; + tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_267 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267")]; + tensor const_557 = const()[name = string("const_557"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440768064)))]; + tensor k_33 = mul(x = normed_267, y = const_557)[name = string("k_33")]; + tensor var_10346 = mul(x = q_33, y = cos_5)[name = string("op_10346")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33)[name = string("x1_65")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33)[name = string("x2_65")]; + fp16 const_560_promoted = const()[name = string("const_560_promoted"), val = fp16(-0x1p+0)]; + tensor var_10367 = mul(x = x2_65, y = const_560_promoted)[name = string("op_10367")]; + int32 var_10369 = const()[name = string("op_10369"), val = int32(-1)]; + bool var_10370_interleave_0 = const()[name = string("op_10370_interleave_0"), val = bool(false)]; + tensor var_10370 = concat(axis = var_10369, interleave = var_10370_interleave_0, values = (var_10367, x1_65))[name = string("op_10370")]; + tensor var_10371 = mul(x = var_10370, y = sin_5)[name = string("op_10371")]; + tensor query_states_131 = add(x = var_10346, y = var_10371)[name = string("query_states_131")]; + tensor var_10374 = mul(x = k_33, y = cos_5)[name = string("op_10374")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33)[name = string("x1_67")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33)[name = string("x2_67")]; + fp16 const_563_promoted = const()[name = string("const_563_promoted"), val = fp16(-0x1p+0)]; + tensor var_10395 = mul(x = x2_67, y = const_563_promoted)[name = string("op_10395")]; + int32 var_10397 = const()[name = string("op_10397"), val = int32(-1)]; + bool var_10398_interleave_0 = const()[name = string("op_10398_interleave_0"), val = bool(false)]; + tensor var_10398 = concat(axis = var_10397, interleave = var_10398_interleave_0, values = (var_10395, x1_67))[name = string("op_10398")]; + tensor var_10399 = mul(x = var_10398, y = sin_5)[name = string("op_10399")]; + tensor key_states_163 = add(x = var_10374, y = var_10399)[name = string("key_states_163")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([16])]; + tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; + tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; + tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([17])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_290")]; + tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; + tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; + int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; + bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; + tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (expand_dims_196, concat_291_values1_0, var_1781, concat_291_values3_0))[name = string("concat_291")]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_290, begin_mask = model_model_kv_cache_0_internal_tensor_assign_33_begin_mask_0, end = concat_291, end_mask = model_model_kv_cache_0_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_33_stride_0, update = key_states_163, x = coreml_update_state_87)[name = string("model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_88")]; + tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([44])]; + tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; + tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; + tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([45])]; + int32 concat_294_axis_0 = const()[name = string("concat_294_axis_0"), val = int32(0)]; + bool concat_294_interleave_0 = const()[name = string("concat_294_interleave_0"), val = bool(false)]; + tensor concat_294 = concat(axis = concat_294_axis_0, interleave = concat_294_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_294")]; + tensor concat_295_values1_0 = const()[name = string("concat_295_values1_0"), val = tensor([0])]; + tensor concat_295_values3_0 = const()[name = string("concat_295_values3_0"), val = tensor([0])]; + int32 concat_295_axis_0 = const()[name = string("concat_295_axis_0"), val = int32(0)]; + bool concat_295_interleave_0 = const()[name = string("concat_295_interleave_0"), val = bool(false)]; + tensor concat_295 = concat(axis = concat_295_axis_0, interleave = concat_295_interleave_0, values = (expand_dims_202, concat_295_values1_0, var_1781, concat_295_values3_0))[name = string("concat_295")]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_131 = transpose(perm = var_10282, x = var_10277)[name = string("transpose_104")]; + tensor model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_0_internal_tensor_assign_34_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_0_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_34_stride_0, update = value_states_131, x = coreml_update_state_88)[name = string("model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_89")]; + tensor var_10470_begin_0 = const()[name = string("op_10470_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_10470_end_0 = const()[name = string("op_10470_end_0"), val = tensor([17, 8, 4096, 128])]; + tensor var_10470_end_mask_0 = const()[name = string("op_10470_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10470_cast_fp16 = slice_by_index(begin = var_10470_begin_0, end = var_10470_end_0, end_mask = var_10470_end_mask_0, x = coreml_update_state_89)[name = string("op_10470_cast_fp16")]; + tensor K_layer_cache_33_axes_0 = const()[name = string("K_layer_cache_33_axes_0"), val = tensor([0])]; + tensor K_layer_cache_33_cast_fp16 = squeeze(axes = K_layer_cache_33_axes_0, x = var_10470_cast_fp16)[name = string("K_layer_cache_33_cast_fp16")]; + tensor var_10477_begin_0 = const()[name = string("op_10477_begin_0"), val = tensor([44, 0, 0, 0])]; + tensor var_10477_end_0 = const()[name = string("op_10477_end_0"), val = tensor([45, 8, 4096, 128])]; + tensor var_10477_end_mask_0 = const()[name = string("op_10477_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_10477_cast_fp16 = slice_by_index(begin = var_10477_begin_0, end = var_10477_end_0, end_mask = var_10477_end_mask_0, x = coreml_update_state_89)[name = string("op_10477_cast_fp16")]; + tensor V_layer_cache_33_axes_0 = const()[name = string("V_layer_cache_33_axes_0"), val = tensor([0])]; + tensor V_layer_cache_33_cast_fp16 = squeeze(axes = V_layer_cache_33_axes_0, x = var_10477_cast_fp16)[name = string("V_layer_cache_33_cast_fp16")]; + tensor x_259_axes_0 = const()[name = string("x_259_axes_0"), val = tensor([1])]; + tensor x_259_cast_fp16 = expand_dims(axes = x_259_axes_0, x = K_layer_cache_33_cast_fp16)[name = string("x_259_cast_fp16")]; + tensor var_10506 = const()[name = string("op_10506"), val = tensor([1, 2, 1, 1])]; + tensor x_261_cast_fp16 = tile(reps = var_10506, x = x_259_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_10518 = const()[name = string("op_10518"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_167_cast_fp16 = reshape(shape = var_10518, x = x_261_cast_fp16)[name = string("key_states_167_cast_fp16")]; + tensor x_265_axes_0 = const()[name = string("x_265_axes_0"), val = tensor([1])]; + tensor x_265_cast_fp16 = expand_dims(axes = x_265_axes_0, x = V_layer_cache_33_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_10526 = const()[name = string("op_10526"), val = tensor([1, 2, 1, 1])]; + tensor x_267_cast_fp16 = tile(reps = var_10526, x = x_265_cast_fp16)[name = string("x_267_cast_fp16")]; + bool var_10553_transpose_x_0 = const()[name = string("op_10553_transpose_x_0"), val = bool(false)]; + bool var_10553_transpose_y_0 = const()[name = string("op_10553_transpose_y_0"), val = bool(true)]; + tensor var_10553 = matmul(transpose_x = var_10553_transpose_x_0, transpose_y = var_10553_transpose_y_0, x = query_states_131, y = key_states_167_cast_fp16)[name = string("op_10553")]; + fp16 var_10554_to_fp16 = const()[name = string("op_10554_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_65_cast_fp16 = mul(x = var_10553, y = var_10554_to_fp16)[name = string("attn_weights_65_cast_fp16")]; + tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = causal_mask)[name = string("attn_weights_67_cast_fp16")]; + int32 var_10589 = const()[name = string("op_10589"), val = int32(-1)]; + tensor var_10591_cast_fp16 = softmax(axis = var_10589, x = attn_weights_67_cast_fp16)[name = string("op_10591_cast_fp16")]; + tensor concat_300 = const()[name = string("concat_300"), val = tensor([16, 64, 4096])]; + tensor reshape_48_cast_fp16 = reshape(shape = concat_300, x = var_10591_cast_fp16)[name = string("reshape_48_cast_fp16")]; + tensor concat_301 = const()[name = string("concat_301"), val = tensor([16, 4096, 128])]; + tensor reshape_49_cast_fp16 = reshape(shape = concat_301, x = x_267_cast_fp16)[name = string("reshape_49_cast_fp16")]; + bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; + bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; + tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; + tensor concat_305 = const()[name = string("concat_305"), val = tensor([1, 16, 64, 128])]; + tensor reshape_50_cast_fp16 = reshape(shape = concat_305, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; + tensor var_10603_perm_0 = const()[name = string("op_10603_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_10622 = const()[name = string("op_10622"), val = tensor([1, 64, 2048])]; + tensor var_10603_cast_fp16 = transpose(perm = var_10603_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_103")]; + tensor attn_output_165_cast_fp16 = reshape(shape = var_10622, x = var_10603_cast_fp16)[name = string("attn_output_165_cast_fp16")]; + tensor var_10627 = const()[name = string("op_10627"), val = tensor([0, 2, 1])]; + string var_10643_pad_type_0 = const()[name = string("op_10643_pad_type_0"), val = string("valid")]; + int32 var_10643_groups_0 = const()[name = string("op_10643_groups_0"), val = int32(1)]; + tensor var_10643_strides_0 = const()[name = string("op_10643_strides_0"), val = tensor([1])]; + tensor var_10643_pad_0 = const()[name = string("op_10643_pad_0"), val = tensor([0, 0])]; + tensor var_10643_dilations_0 = const()[name = string("op_10643_dilations_0"), val = tensor([1])]; + tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440768384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442865600))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_10628_cast_fp16 = transpose(perm = var_10627, x = attn_output_165_cast_fp16)[name = string("transpose_102")]; + tensor var_10643_cast_fp16 = conv(dilations = var_10643_dilations_0, groups = var_10643_groups_0, pad = var_10643_pad_0, pad_type = var_10643_pad_type_0, strides = var_10643_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_10628_cast_fp16)[name = string("op_10643_cast_fp16")]; + tensor var_10647 = const()[name = string("op_10647"), val = tensor([0, 2, 1])]; + tensor attn_output_169_cast_fp16 = transpose(perm = var_10647, x = var_10643_cast_fp16)[name = string("transpose_101")]; + tensor hidden_states_169_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; + int32 var_10660 = const()[name = string("op_10660"), val = int32(-1)]; + fp16 const_575_promoted_to_fp16 = const()[name = string("const_575_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10662_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = const_575_promoted_to_fp16)[name = string("op_10662_cast_fp16")]; + bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; + tensor input_299_cast_fp16 = concat(axis = var_10660, interleave = input_299_interleave_0, values = (hidden_states_169_cast_fp16, var_10662_cast_fp16))[name = string("input_299_cast_fp16")]; + tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; + fp16 var_10657_to_fp16 = const()[name = string("op_10657_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_10657_to_fp16, x = input_299_cast_fp16)[name = string("normed_269_cast_fp16")]; + tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_271_cast_fp16 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271_cast_fp16")]; + tensor const_578_promoted_to_fp16 = const()[name = string("const_578_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442931200)))]; + tensor x_269_cast_fp16 = mul(x = normed_271_cast_fp16, y = const_578_promoted_to_fp16)[name = string("x_269_cast_fp16")]; + tensor var_10687 = const()[name = string("op_10687"), val = tensor([0, 2, 1])]; + tensor input_301_axes_0 = const()[name = string("input_301_axes_0"), val = tensor([2])]; + tensor var_10688 = transpose(perm = var_10687, x = x_269_cast_fp16)[name = string("transpose_100")]; + tensor input_301 = expand_dims(axes = input_301_axes_0, x = var_10688)[name = string("input_301")]; + string input_303_pad_type_0 = const()[name = string("input_303_pad_type_0"), val = string("valid")]; + tensor input_303_strides_0 = const()[name = string("input_303_strides_0"), val = tensor([1, 1])]; + tensor input_303_pad_0 = const()[name = string("input_303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_303_dilations_0 = const()[name = string("input_303_dilations_0"), val = tensor([1, 1])]; + int32 input_303_groups_0 = const()[name = string("input_303_groups_0"), val = int32(1)]; + tensor input_303 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_301)[name = string("input_303")]; + string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; + tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; + tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; + int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; + tensor b_33 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_301)[name = string("b_33")]; + tensor c_33 = silu(x = input_303)[name = string("c_33")]; + tensor input_305 = mul(x = c_33, y = b_33)[name = string("input_305")]; + string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; + tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; + tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; + int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; + tensor e_33 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_305)[name = string("e_33")]; + tensor var_10710_axes_0 = const()[name = string("op_10710_axes_0"), val = tensor([2])]; + tensor var_10710 = squeeze(axes = var_10710_axes_0, x = e_33)[name = string("op_10710")]; + tensor var_10711 = const()[name = string("op_10711"), val = tensor([0, 2, 1])]; + tensor var_10712 = transpose(perm = var_10711, x = var_10710)[name = string("transpose_99")]; + tensor hidden_states_171_cast_fp16 = add(x = hidden_states_169_cast_fp16, y = var_10712)[name = string("hidden_states_171_cast_fp16")]; + int32 var_10724 = const()[name = string("op_10724"), val = int32(-1)]; + fp16 const_579_promoted_to_fp16 = const()[name = string("const_579_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_10726_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = const_579_promoted_to_fp16)[name = string("op_10726_cast_fp16")]; + bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; + tensor input_307_cast_fp16 = concat(axis = var_10724, interleave = input_307_interleave_0, values = (hidden_states_171_cast_fp16, var_10726_cast_fp16))[name = string("input_307_cast_fp16")]; + tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; + fp16 var_10721_to_fp16 = const()[name = string("op_10721_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_10721_to_fp16, x = input_307_cast_fp16)[name = string("normed_273_cast_fp16")]; + tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_275_cast_fp16 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275_cast_fp16")]; + tensor const_582_promoted_to_fp16 = const()[name = string("const_582_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442933312)))]; + tensor hidden_states_173_cast_fp16 = mul(x = normed_275_cast_fp16, y = const_582_promoted_to_fp16)[name = string("hidden_states_173_cast_fp16")]; + tensor var_10749 = const()[name = string("op_10749"), val = tensor([0, 2, 1])]; + tensor var_10752_axes_0 = const()[name = string("op_10752_axes_0"), val = tensor([2])]; + tensor var_10750_cast_fp16 = transpose(perm = var_10749, x = hidden_states_173_cast_fp16)[name = string("transpose_98")]; + tensor var_10752_cast_fp16 = expand_dims(axes = var_10752_axes_0, x = var_10750_cast_fp16)[name = string("op_10752_cast_fp16")]; + string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; + tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; + tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; + int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; + tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_10752_cast_fp16)[name = string("query_states_137")]; + string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; + tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; + tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; + int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; + tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_10752_cast_fp16)[name = string("key_states_171")]; + string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; + tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; + tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; + int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; + tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_10752_cast_fp16)[name = string("value_states_137")]; + tensor var_10794 = const()[name = string("op_10794"), val = tensor([1, 16, 128, 64])]; + tensor var_10795 = reshape(shape = var_10794, x = query_states_137)[name = string("op_10795")]; + tensor var_10800 = const()[name = string("op_10800"), val = tensor([0, 1, 3, 2])]; + tensor var_10805 = const()[name = string("op_10805"), val = tensor([1, 8, 128, 64])]; + tensor var_10806 = reshape(shape = var_10805, x = key_states_171)[name = string("op_10806")]; + tensor var_10811 = const()[name = string("op_10811"), val = tensor([0, 1, 3, 2])]; + tensor var_10816 = const()[name = string("op_10816"), val = tensor([1, 8, 128, 64])]; + tensor var_10817 = reshape(shape = var_10816, x = value_states_137)[name = string("op_10817")]; + tensor var_10822 = const()[name = string("op_10822"), val = tensor([0, 1, 3, 2])]; + int32 var_10833 = const()[name = string("op_10833"), val = int32(-1)]; + fp16 const_584_promoted = const()[name = string("const_584_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_175 = transpose(perm = var_10800, x = var_10795)[name = string("transpose_97")]; + tensor var_10835 = mul(x = hidden_states_175, y = const_584_promoted)[name = string("op_10835")]; + bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; + tensor input_311 = concat(axis = var_10833, interleave = input_311_interleave_0, values = (hidden_states_175, var_10835))[name = string("input_311")]; + tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; + fp16 var_10830_to_fp16 = const()[name = string("op_10830_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_10830_to_fp16, x = input_311)[name = string("normed_277_cast_fp16")]; + tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_279 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279")]; + tensor const_587 = const()[name = string("const_587"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442935424)))]; + tensor q_35 = mul(x = normed_279, y = const_587)[name = string("q_35")]; + int32 var_10858 = const()[name = string("op_10858"), val = int32(-1)]; + fp16 const_588_promoted = const()[name = string("const_588_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_177 = transpose(perm = var_10811, x = var_10806)[name = string("transpose_96")]; + tensor var_10860 = mul(x = hidden_states_177, y = const_588_promoted)[name = string("op_10860")]; + bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; + tensor input_313 = concat(axis = var_10858, interleave = input_313_interleave_0, values = (hidden_states_177, var_10860))[name = string("input_313")]; + tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; + fp16 var_10855_to_fp16 = const()[name = string("op_10855_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_10855_to_fp16, x = input_313)[name = string("normed_281_cast_fp16")]; + tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_283 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283")]; + tensor const_591 = const()[name = string("const_591"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442935744)))]; + tensor k_35 = mul(x = normed_283, y = const_591)[name = string("k_35")]; + tensor var_10886 = mul(x = q_35, y = cos_5)[name = string("op_10886")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35)[name = string("x1_69")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35)[name = string("x2_69")]; + fp16 const_594_promoted = const()[name = string("const_594_promoted"), val = fp16(-0x1p+0)]; + tensor var_10907 = mul(x = x2_69, y = const_594_promoted)[name = string("op_10907")]; + int32 var_10909 = const()[name = string("op_10909"), val = int32(-1)]; + bool var_10910_interleave_0 = const()[name = string("op_10910_interleave_0"), val = bool(false)]; + tensor var_10910 = concat(axis = var_10909, interleave = var_10910_interleave_0, values = (var_10907, x1_69))[name = string("op_10910")]; + tensor var_10911 = mul(x = var_10910, y = sin_5)[name = string("op_10911")]; + tensor query_states_139 = add(x = var_10886, y = var_10911)[name = string("query_states_139")]; + tensor var_10914 = mul(x = k_35, y = cos_5)[name = string("op_10914")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35)[name = string("x1_71")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35)[name = string("x2_71")]; + fp16 const_597_promoted = const()[name = string("const_597_promoted"), val = fp16(-0x1p+0)]; + tensor var_10935 = mul(x = x2_71, y = const_597_promoted)[name = string("op_10935")]; + int32 var_10937 = const()[name = string("op_10937"), val = int32(-1)]; + bool var_10938_interleave_0 = const()[name = string("op_10938_interleave_0"), val = bool(false)]; + tensor var_10938 = concat(axis = var_10937, interleave = var_10938_interleave_0, values = (var_10935, x1_71))[name = string("op_10938")]; + tensor var_10939 = mul(x = var_10938, y = sin_5)[name = string("op_10939")]; + tensor key_states_173 = add(x = var_10914, y = var_10939)[name = string("key_states_173")]; + tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([17])]; + tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; + tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; + tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([18])]; + int32 concat_308_axis_0 = const()[name = string("concat_308_axis_0"), val = int32(0)]; + bool concat_308_interleave_0 = const()[name = string("concat_308_interleave_0"), val = bool(false)]; + tensor concat_308 = concat(axis = concat_308_axis_0, interleave = concat_308_interleave_0, values = (expand_dims_204, expand_dims_205, current_pos, expand_dims_207))[name = string("concat_308")]; + tensor concat_309_values1_0 = const()[name = string("concat_309_values1_0"), val = tensor([0])]; + tensor concat_309_values3_0 = const()[name = string("concat_309_values3_0"), val = tensor([0])]; + int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; + bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; + tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (expand_dims_208, concat_309_values1_0, var_1781, concat_309_values3_0))[name = string("concat_309")]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_308, begin_mask = model_model_kv_cache_0_internal_tensor_assign_35_begin_mask_0, end = concat_309, end_mask = model_model_kv_cache_0_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_35_stride_0, update = key_states_173, x = coreml_update_state_89)[name = string("model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_90")]; + tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([45])]; + tensor expand_dims_211 = const()[name = string("expand_dims_211"), val = tensor([0])]; + tensor expand_dims_213 = const()[name = string("expand_dims_213"), val = tensor([0])]; + tensor expand_dims_214 = const()[name = string("expand_dims_214"), val = tensor([46])]; + int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; + bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; + tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (expand_dims_210, expand_dims_211, current_pos, expand_dims_213))[name = string("concat_312")]; + tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; + tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; + int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; + bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; + tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (expand_dims_214, concat_313_values1_0, var_1781, concat_313_values3_0))[name = string("concat_313")]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_139 = transpose(perm = var_10822, x = var_10817)[name = string("transpose_95")]; + tensor model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_312, begin_mask = model_model_kv_cache_0_internal_tensor_assign_36_begin_mask_0, end = concat_313, end_mask = model_model_kv_cache_0_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_36_stride_0, update = value_states_139, x = coreml_update_state_90)[name = string("model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_91")]; + tensor var_11010_begin_0 = const()[name = string("op_11010_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_11010_end_0 = const()[name = string("op_11010_end_0"), val = tensor([18, 8, 4096, 128])]; + tensor var_11010_end_mask_0 = const()[name = string("op_11010_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11010_cast_fp16 = slice_by_index(begin = var_11010_begin_0, end = var_11010_end_0, end_mask = var_11010_end_mask_0, x = coreml_update_state_91)[name = string("op_11010_cast_fp16")]; + tensor K_layer_cache_35_axes_0 = const()[name = string("K_layer_cache_35_axes_0"), val = tensor([0])]; + tensor K_layer_cache_35_cast_fp16 = squeeze(axes = K_layer_cache_35_axes_0, x = var_11010_cast_fp16)[name = string("K_layer_cache_35_cast_fp16")]; + tensor var_11017_begin_0 = const()[name = string("op_11017_begin_0"), val = tensor([45, 0, 0, 0])]; + tensor var_11017_end_0 = const()[name = string("op_11017_end_0"), val = tensor([46, 8, 4096, 128])]; + tensor var_11017_end_mask_0 = const()[name = string("op_11017_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11017_cast_fp16 = slice_by_index(begin = var_11017_begin_0, end = var_11017_end_0, end_mask = var_11017_end_mask_0, x = coreml_update_state_91)[name = string("op_11017_cast_fp16")]; + tensor V_layer_cache_35_axes_0 = const()[name = string("V_layer_cache_35_axes_0"), val = tensor([0])]; + tensor V_layer_cache_35_cast_fp16 = squeeze(axes = V_layer_cache_35_axes_0, x = var_11017_cast_fp16)[name = string("V_layer_cache_35_cast_fp16")]; + tensor x_275_axes_0 = const()[name = string("x_275_axes_0"), val = tensor([1])]; + tensor x_275_cast_fp16 = expand_dims(axes = x_275_axes_0, x = K_layer_cache_35_cast_fp16)[name = string("x_275_cast_fp16")]; + tensor var_11046 = const()[name = string("op_11046"), val = tensor([1, 2, 1, 1])]; + tensor x_277_cast_fp16 = tile(reps = var_11046, x = x_275_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_11058 = const()[name = string("op_11058"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_177_cast_fp16 = reshape(shape = var_11058, x = x_277_cast_fp16)[name = string("key_states_177_cast_fp16")]; + tensor x_281_axes_0 = const()[name = string("x_281_axes_0"), val = tensor([1])]; + tensor x_281_cast_fp16 = expand_dims(axes = x_281_axes_0, x = V_layer_cache_35_cast_fp16)[name = string("x_281_cast_fp16")]; + tensor var_11066 = const()[name = string("op_11066"), val = tensor([1, 2, 1, 1])]; + tensor x_283_cast_fp16 = tile(reps = var_11066, x = x_281_cast_fp16)[name = string("x_283_cast_fp16")]; + bool var_11093_transpose_x_0 = const()[name = string("op_11093_transpose_x_0"), val = bool(false)]; + bool var_11093_transpose_y_0 = const()[name = string("op_11093_transpose_y_0"), val = bool(true)]; + tensor var_11093 = matmul(transpose_x = var_11093_transpose_x_0, transpose_y = var_11093_transpose_y_0, x = query_states_139, y = key_states_177_cast_fp16)[name = string("op_11093")]; + fp16 var_11094_to_fp16 = const()[name = string("op_11094_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_69_cast_fp16 = mul(x = var_11093, y = var_11094_to_fp16)[name = string("attn_weights_69_cast_fp16")]; + tensor attn_weights_71_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_71_cast_fp16")]; + int32 var_11129 = const()[name = string("op_11129"), val = int32(-1)]; + tensor var_11131_cast_fp16 = softmax(axis = var_11129, x = attn_weights_71_cast_fp16)[name = string("op_11131_cast_fp16")]; + tensor concat_318 = const()[name = string("concat_318"), val = tensor([16, 64, 4096])]; + tensor reshape_51_cast_fp16 = reshape(shape = concat_318, x = var_11131_cast_fp16)[name = string("reshape_51_cast_fp16")]; + tensor concat_319 = const()[name = string("concat_319"), val = tensor([16, 4096, 128])]; + tensor reshape_52_cast_fp16 = reshape(shape = concat_319, x = x_283_cast_fp16)[name = string("reshape_52_cast_fp16")]; + bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; + bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; + tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; + tensor concat_323 = const()[name = string("concat_323"), val = tensor([1, 16, 64, 128])]; + tensor reshape_53_cast_fp16 = reshape(shape = concat_323, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; + tensor var_11143_perm_0 = const()[name = string("op_11143_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11162 = const()[name = string("op_11162"), val = tensor([1, 64, 2048])]; + tensor var_11143_cast_fp16 = transpose(perm = var_11143_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_94")]; + tensor attn_output_175_cast_fp16 = reshape(shape = var_11162, x = var_11143_cast_fp16)[name = string("attn_output_175_cast_fp16")]; + tensor var_11167 = const()[name = string("op_11167"), val = tensor([0, 2, 1])]; + string var_11183_pad_type_0 = const()[name = string("op_11183_pad_type_0"), val = string("valid")]; + int32 var_11183_groups_0 = const()[name = string("op_11183_groups_0"), val = int32(1)]; + tensor var_11183_strides_0 = const()[name = string("op_11183_strides_0"), val = tensor([1])]; + tensor var_11183_pad_0 = const()[name = string("op_11183_pad_0"), val = tensor([0, 0])]; + tensor var_11183_dilations_0 = const()[name = string("op_11183_dilations_0"), val = tensor([1])]; + tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442936064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445033280))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11168_cast_fp16 = transpose(perm = var_11167, x = attn_output_175_cast_fp16)[name = string("transpose_93")]; + tensor var_11183_cast_fp16 = conv(dilations = var_11183_dilations_0, groups = var_11183_groups_0, pad = var_11183_pad_0, pad_type = var_11183_pad_type_0, strides = var_11183_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_11168_cast_fp16)[name = string("op_11183_cast_fp16")]; + tensor var_11187 = const()[name = string("op_11187"), val = tensor([0, 2, 1])]; + tensor attn_output_179_cast_fp16 = transpose(perm = var_11187, x = var_11183_cast_fp16)[name = string("transpose_92")]; + tensor hidden_states_179_cast_fp16 = add(x = hidden_states_171_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; + int32 var_11200 = const()[name = string("op_11200"), val = int32(-1)]; + fp16 const_609_promoted_to_fp16 = const()[name = string("const_609_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11202_cast_fp16 = mul(x = hidden_states_179_cast_fp16, y = const_609_promoted_to_fp16)[name = string("op_11202_cast_fp16")]; + bool input_317_interleave_0 = const()[name = string("input_317_interleave_0"), val = bool(false)]; + tensor input_317_cast_fp16 = concat(axis = var_11200, interleave = input_317_interleave_0, values = (hidden_states_179_cast_fp16, var_11202_cast_fp16))[name = string("input_317_cast_fp16")]; + tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; + fp16 var_11197_to_fp16 = const()[name = string("op_11197_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_11197_to_fp16, x = input_317_cast_fp16)[name = string("normed_285_cast_fp16")]; + tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; + tensor const_612_promoted_to_fp16 = const()[name = string("const_612_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445098880)))]; + tensor x_285_cast_fp16 = mul(x = normed_287_cast_fp16, y = const_612_promoted_to_fp16)[name = string("x_285_cast_fp16")]; + tensor var_11227 = const()[name = string("op_11227"), val = tensor([0, 2, 1])]; + tensor input_319_axes_0 = const()[name = string("input_319_axes_0"), val = tensor([2])]; + tensor var_11228 = transpose(perm = var_11227, x = x_285_cast_fp16)[name = string("transpose_91")]; + tensor input_319 = expand_dims(axes = input_319_axes_0, x = var_11228)[name = string("input_319")]; + string input_321_pad_type_0 = const()[name = string("input_321_pad_type_0"), val = string("valid")]; + tensor input_321_strides_0 = const()[name = string("input_321_strides_0"), val = tensor([1, 1])]; + tensor input_321_pad_0 = const()[name = string("input_321_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_321_dilations_0 = const()[name = string("input_321_dilations_0"), val = tensor([1, 1])]; + int32 input_321_groups_0 = const()[name = string("input_321_groups_0"), val = int32(1)]; + tensor input_321 = conv(dilations = input_321_dilations_0, groups = input_321_groups_0, pad = input_321_pad_0, pad_type = input_321_pad_type_0, strides = input_321_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_319)[name = string("input_321")]; + string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; + tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; + tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; + int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; + tensor b_35 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_319)[name = string("b_35")]; + tensor c_35 = silu(x = input_321)[name = string("c_35")]; + tensor input_323 = mul(x = c_35, y = b_35)[name = string("input_323")]; + string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; + tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; + tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; + int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; + tensor e_35 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input_323)[name = string("e_35")]; + tensor var_11250_axes_0 = const()[name = string("op_11250_axes_0"), val = tensor([2])]; + tensor var_11250 = squeeze(axes = var_11250_axes_0, x = e_35)[name = string("op_11250")]; + tensor var_11251 = const()[name = string("op_11251"), val = tensor([0, 2, 1])]; + tensor var_11252 = transpose(perm = var_11251, x = var_11250)[name = string("transpose_90")]; + tensor hidden_states_181_cast_fp16 = add(x = hidden_states_179_cast_fp16, y = var_11252)[name = string("hidden_states_181_cast_fp16")]; + int32 var_11264 = const()[name = string("op_11264"), val = int32(-1)]; + fp16 const_613_promoted_to_fp16 = const()[name = string("const_613_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11266_cast_fp16 = mul(x = hidden_states_181_cast_fp16, y = const_613_promoted_to_fp16)[name = string("op_11266_cast_fp16")]; + bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; + tensor input_325_cast_fp16 = concat(axis = var_11264, interleave = input_325_interleave_0, values = (hidden_states_181_cast_fp16, var_11266_cast_fp16))[name = string("input_325_cast_fp16")]; + tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; + fp16 var_11261_to_fp16 = const()[name = string("op_11261_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_11261_to_fp16, x = input_325_cast_fp16)[name = string("normed_289_cast_fp16")]; + tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; + tensor const_616_promoted_to_fp16 = const()[name = string("const_616_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445100992)))]; + tensor hidden_states_183_cast_fp16 = mul(x = normed_291_cast_fp16, y = const_616_promoted_to_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor var_11289 = const()[name = string("op_11289"), val = tensor([0, 2, 1])]; + tensor var_11292_axes_0 = const()[name = string("op_11292_axes_0"), val = tensor([2])]; + tensor var_11290_cast_fp16 = transpose(perm = var_11289, x = hidden_states_183_cast_fp16)[name = string("transpose_89")]; + tensor var_11292_cast_fp16 = expand_dims(axes = var_11292_axes_0, x = var_11290_cast_fp16)[name = string("op_11292_cast_fp16")]; + string query_states_145_pad_type_0 = const()[name = string("query_states_145_pad_type_0"), val = string("valid")]; + tensor query_states_145_strides_0 = const()[name = string("query_states_145_strides_0"), val = tensor([1, 1])]; + tensor query_states_145_pad_0 = const()[name = string("query_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_145_dilations_0 = const()[name = string("query_states_145_dilations_0"), val = tensor([1, 1])]; + int32 query_states_145_groups_0 = const()[name = string("query_states_145_groups_0"), val = int32(1)]; + tensor query_states_145 = conv(dilations = query_states_145_dilations_0, groups = query_states_145_groups_0, pad = query_states_145_pad_0, pad_type = query_states_145_pad_type_0, strides = query_states_145_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_11292_cast_fp16)[name = string("query_states_145")]; + string key_states_181_pad_type_0 = const()[name = string("key_states_181_pad_type_0"), val = string("valid")]; + tensor key_states_181_strides_0 = const()[name = string("key_states_181_strides_0"), val = tensor([1, 1])]; + tensor key_states_181_pad_0 = const()[name = string("key_states_181_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_181_dilations_0 = const()[name = string("key_states_181_dilations_0"), val = tensor([1, 1])]; + int32 key_states_181_groups_0 = const()[name = string("key_states_181_groups_0"), val = int32(1)]; + tensor key_states_181 = conv(dilations = key_states_181_dilations_0, groups = key_states_181_groups_0, pad = key_states_181_pad_0, pad_type = key_states_181_pad_type_0, strides = key_states_181_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_11292_cast_fp16)[name = string("key_states_181")]; + string value_states_145_pad_type_0 = const()[name = string("value_states_145_pad_type_0"), val = string("valid")]; + tensor value_states_145_strides_0 = const()[name = string("value_states_145_strides_0"), val = tensor([1, 1])]; + tensor value_states_145_pad_0 = const()[name = string("value_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_145_dilations_0 = const()[name = string("value_states_145_dilations_0"), val = tensor([1, 1])]; + int32 value_states_145_groups_0 = const()[name = string("value_states_145_groups_0"), val = int32(1)]; + tensor value_states_145 = conv(dilations = value_states_145_dilations_0, groups = value_states_145_groups_0, pad = value_states_145_pad_0, pad_type = value_states_145_pad_type_0, strides = value_states_145_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_11292_cast_fp16)[name = string("value_states_145")]; + tensor var_11334 = const()[name = string("op_11334"), val = tensor([1, 16, 128, 64])]; + tensor var_11335 = reshape(shape = var_11334, x = query_states_145)[name = string("op_11335")]; + tensor var_11340 = const()[name = string("op_11340"), val = tensor([0, 1, 3, 2])]; + tensor var_11345 = const()[name = string("op_11345"), val = tensor([1, 8, 128, 64])]; + tensor var_11346 = reshape(shape = var_11345, x = key_states_181)[name = string("op_11346")]; + tensor var_11351 = const()[name = string("op_11351"), val = tensor([0, 1, 3, 2])]; + tensor var_11356 = const()[name = string("op_11356"), val = tensor([1, 8, 128, 64])]; + tensor var_11357 = reshape(shape = var_11356, x = value_states_145)[name = string("op_11357")]; + tensor var_11362 = const()[name = string("op_11362"), val = tensor([0, 1, 3, 2])]; + int32 var_11373 = const()[name = string("op_11373"), val = int32(-1)]; + fp16 const_618_promoted = const()[name = string("const_618_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_185 = transpose(perm = var_11340, x = var_11335)[name = string("transpose_88")]; + tensor var_11375 = mul(x = hidden_states_185, y = const_618_promoted)[name = string("op_11375")]; + bool input_329_interleave_0 = const()[name = string("input_329_interleave_0"), val = bool(false)]; + tensor input_329 = concat(axis = var_11373, interleave = input_329_interleave_0, values = (hidden_states_185, var_11375))[name = string("input_329")]; + tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; + fp16 var_11370_to_fp16 = const()[name = string("op_11370_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_11370_to_fp16, x = input_329)[name = string("normed_293_cast_fp16")]; + tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; + tensor const_621 = const()[name = string("const_621"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445103104)))]; + tensor q_37 = mul(x = normed_295, y = const_621)[name = string("q_37")]; + int32 var_11398 = const()[name = string("op_11398"), val = int32(-1)]; + fp16 const_622_promoted = const()[name = string("const_622_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_187 = transpose(perm = var_11351, x = var_11346)[name = string("transpose_87")]; + tensor var_11400 = mul(x = hidden_states_187, y = const_622_promoted)[name = string("op_11400")]; + bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; + tensor input_331 = concat(axis = var_11398, interleave = input_331_interleave_0, values = (hidden_states_187, var_11400))[name = string("input_331")]; + tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; + fp16 var_11395_to_fp16 = const()[name = string("op_11395_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_11395_to_fp16, x = input_331)[name = string("normed_297_cast_fp16")]; + tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; + tensor const_625 = const()[name = string("const_625"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445103424)))]; + tensor k_37 = mul(x = normed_299, y = const_625)[name = string("k_37")]; + tensor var_11426 = mul(x = q_37, y = cos_5)[name = string("op_11426")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37)[name = string("x1_73")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37)[name = string("x2_73")]; + fp16 const_628_promoted = const()[name = string("const_628_promoted"), val = fp16(-0x1p+0)]; + tensor var_11447 = mul(x = x2_73, y = const_628_promoted)[name = string("op_11447")]; + int32 var_11449 = const()[name = string("op_11449"), val = int32(-1)]; + bool var_11450_interleave_0 = const()[name = string("op_11450_interleave_0"), val = bool(false)]; + tensor var_11450 = concat(axis = var_11449, interleave = var_11450_interleave_0, values = (var_11447, x1_73))[name = string("op_11450")]; + tensor var_11451 = mul(x = var_11450, y = sin_5)[name = string("op_11451")]; + tensor query_states_147 = add(x = var_11426, y = var_11451)[name = string("query_states_147")]; + tensor var_11454 = mul(x = k_37, y = cos_5)[name = string("op_11454")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37)[name = string("x1_75")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37)[name = string("x2_75")]; + fp16 const_631_promoted = const()[name = string("const_631_promoted"), val = fp16(-0x1p+0)]; + tensor var_11475 = mul(x = x2_75, y = const_631_promoted)[name = string("op_11475")]; + int32 var_11477 = const()[name = string("op_11477"), val = int32(-1)]; + bool var_11478_interleave_0 = const()[name = string("op_11478_interleave_0"), val = bool(false)]; + tensor var_11478 = concat(axis = var_11477, interleave = var_11478_interleave_0, values = (var_11475, x1_75))[name = string("op_11478")]; + tensor var_11479 = mul(x = var_11478, y = sin_5)[name = string("op_11479")]; + tensor key_states_183 = add(x = var_11454, y = var_11479)[name = string("key_states_183")]; + tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([18])]; + tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; + tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; + tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([19])]; + int32 concat_326_axis_0 = const()[name = string("concat_326_axis_0"), val = int32(0)]; + bool concat_326_interleave_0 = const()[name = string("concat_326_interleave_0"), val = bool(false)]; + tensor concat_326 = concat(axis = concat_326_axis_0, interleave = concat_326_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_326")]; + tensor concat_327_values1_0 = const()[name = string("concat_327_values1_0"), val = tensor([0])]; + tensor concat_327_values3_0 = const()[name = string("concat_327_values3_0"), val = tensor([0])]; + int32 concat_327_axis_0 = const()[name = string("concat_327_axis_0"), val = int32(0)]; + bool concat_327_interleave_0 = const()[name = string("concat_327_interleave_0"), val = bool(false)]; + tensor concat_327 = concat(axis = concat_327_axis_0, interleave = concat_327_interleave_0, values = (expand_dims_220, concat_327_values1_0, var_1781, concat_327_values3_0))[name = string("concat_327")]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_326, begin_mask = model_model_kv_cache_0_internal_tensor_assign_37_begin_mask_0, end = concat_327, end_mask = model_model_kv_cache_0_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_37_stride_0, update = key_states_183, x = coreml_update_state_91)[name = string("model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_92")]; + tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([46])]; + tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; + tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; + tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([47])]; + int32 concat_330_axis_0 = const()[name = string("concat_330_axis_0"), val = int32(0)]; + bool concat_330_interleave_0 = const()[name = string("concat_330_interleave_0"), val = bool(false)]; + tensor concat_330 = concat(axis = concat_330_axis_0, interleave = concat_330_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_330")]; + tensor concat_331_values1_0 = const()[name = string("concat_331_values1_0"), val = tensor([0])]; + tensor concat_331_values3_0 = const()[name = string("concat_331_values3_0"), val = tensor([0])]; + int32 concat_331_axis_0 = const()[name = string("concat_331_axis_0"), val = int32(0)]; + bool concat_331_interleave_0 = const()[name = string("concat_331_interleave_0"), val = bool(false)]; + tensor concat_331 = concat(axis = concat_331_axis_0, interleave = concat_331_interleave_0, values = (expand_dims_226, concat_331_values1_0, var_1781, concat_331_values3_0))[name = string("concat_331")]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_147 = transpose(perm = var_11362, x = var_11357)[name = string("transpose_86")]; + tensor model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_330, begin_mask = model_model_kv_cache_0_internal_tensor_assign_38_begin_mask_0, end = concat_331, end_mask = model_model_kv_cache_0_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_38_stride_0, update = value_states_147, x = coreml_update_state_92)[name = string("model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_93")]; + tensor var_11550_begin_0 = const()[name = string("op_11550_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_11550_end_0 = const()[name = string("op_11550_end_0"), val = tensor([19, 8, 4096, 128])]; + tensor var_11550_end_mask_0 = const()[name = string("op_11550_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11550_cast_fp16 = slice_by_index(begin = var_11550_begin_0, end = var_11550_end_0, end_mask = var_11550_end_mask_0, x = coreml_update_state_93)[name = string("op_11550_cast_fp16")]; + tensor K_layer_cache_37_axes_0 = const()[name = string("K_layer_cache_37_axes_0"), val = tensor([0])]; + tensor K_layer_cache_37_cast_fp16 = squeeze(axes = K_layer_cache_37_axes_0, x = var_11550_cast_fp16)[name = string("K_layer_cache_37_cast_fp16")]; + tensor var_11557_begin_0 = const()[name = string("op_11557_begin_0"), val = tensor([46, 0, 0, 0])]; + tensor var_11557_end_0 = const()[name = string("op_11557_end_0"), val = tensor([47, 8, 4096, 128])]; + tensor var_11557_end_mask_0 = const()[name = string("op_11557_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_11557_cast_fp16 = slice_by_index(begin = var_11557_begin_0, end = var_11557_end_0, end_mask = var_11557_end_mask_0, x = coreml_update_state_93)[name = string("op_11557_cast_fp16")]; + tensor V_layer_cache_37_axes_0 = const()[name = string("V_layer_cache_37_axes_0"), val = tensor([0])]; + tensor V_layer_cache_37_cast_fp16 = squeeze(axes = V_layer_cache_37_axes_0, x = var_11557_cast_fp16)[name = string("V_layer_cache_37_cast_fp16")]; + tensor x_291_axes_0 = const()[name = string("x_291_axes_0"), val = tensor([1])]; + tensor x_291_cast_fp16 = expand_dims(axes = x_291_axes_0, x = K_layer_cache_37_cast_fp16)[name = string("x_291_cast_fp16")]; + tensor var_11586 = const()[name = string("op_11586"), val = tensor([1, 2, 1, 1])]; + tensor x_293_cast_fp16 = tile(reps = var_11586, x = x_291_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_11598 = const()[name = string("op_11598"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_187_cast_fp16 = reshape(shape = var_11598, x = x_293_cast_fp16)[name = string("key_states_187_cast_fp16")]; + tensor x_297_axes_0 = const()[name = string("x_297_axes_0"), val = tensor([1])]; + tensor x_297_cast_fp16 = expand_dims(axes = x_297_axes_0, x = V_layer_cache_37_cast_fp16)[name = string("x_297_cast_fp16")]; + tensor var_11606 = const()[name = string("op_11606"), val = tensor([1, 2, 1, 1])]; + tensor x_299_cast_fp16 = tile(reps = var_11606, x = x_297_cast_fp16)[name = string("x_299_cast_fp16")]; + bool var_11633_transpose_x_0 = const()[name = string("op_11633_transpose_x_0"), val = bool(false)]; + bool var_11633_transpose_y_0 = const()[name = string("op_11633_transpose_y_0"), val = bool(true)]; + tensor var_11633 = matmul(transpose_x = var_11633_transpose_x_0, transpose_y = var_11633_transpose_y_0, x = query_states_147, y = key_states_187_cast_fp16)[name = string("op_11633")]; + fp16 var_11634_to_fp16 = const()[name = string("op_11634_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_73_cast_fp16 = mul(x = var_11633, y = var_11634_to_fp16)[name = string("attn_weights_73_cast_fp16")]; + tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = causal_mask)[name = string("attn_weights_75_cast_fp16")]; + int32 var_11669 = const()[name = string("op_11669"), val = int32(-1)]; + tensor var_11671_cast_fp16 = softmax(axis = var_11669, x = attn_weights_75_cast_fp16)[name = string("op_11671_cast_fp16")]; + tensor concat_336 = const()[name = string("concat_336"), val = tensor([16, 64, 4096])]; + tensor reshape_54_cast_fp16 = reshape(shape = concat_336, x = var_11671_cast_fp16)[name = string("reshape_54_cast_fp16")]; + tensor concat_337 = const()[name = string("concat_337"), val = tensor([16, 4096, 128])]; + tensor reshape_55_cast_fp16 = reshape(shape = concat_337, x = x_299_cast_fp16)[name = string("reshape_55_cast_fp16")]; + bool matmul_18_transpose_x_0 = const()[name = string("matmul_18_transpose_x_0"), val = bool(false)]; + bool matmul_18_transpose_y_0 = const()[name = string("matmul_18_transpose_y_0"), val = bool(false)]; + tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_0, transpose_y = matmul_18_transpose_y_0, x = reshape_54_cast_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")]; + tensor concat_341 = const()[name = string("concat_341"), val = tensor([1, 16, 64, 128])]; + tensor reshape_56_cast_fp16 = reshape(shape = concat_341, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")]; + tensor var_11683_perm_0 = const()[name = string("op_11683_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_11702 = const()[name = string("op_11702"), val = tensor([1, 64, 2048])]; + tensor var_11683_cast_fp16 = transpose(perm = var_11683_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_85")]; + tensor attn_output_185_cast_fp16 = reshape(shape = var_11702, x = var_11683_cast_fp16)[name = string("attn_output_185_cast_fp16")]; + tensor var_11707 = const()[name = string("op_11707"), val = tensor([0, 2, 1])]; + string var_11723_pad_type_0 = const()[name = string("op_11723_pad_type_0"), val = string("valid")]; + int32 var_11723_groups_0 = const()[name = string("op_11723_groups_0"), val = int32(1)]; + tensor var_11723_strides_0 = const()[name = string("op_11723_strides_0"), val = tensor([1])]; + tensor var_11723_pad_0 = const()[name = string("op_11723_pad_0"), val = tensor([0, 0])]; + tensor var_11723_dilations_0 = const()[name = string("op_11723_dilations_0"), val = tensor([1])]; + tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445103744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447200960))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_11708_cast_fp16 = transpose(perm = var_11707, x = attn_output_185_cast_fp16)[name = string("transpose_84")]; + tensor var_11723_cast_fp16 = conv(dilations = var_11723_dilations_0, groups = var_11723_groups_0, pad = var_11723_pad_0, pad_type = var_11723_pad_type_0, strides = var_11723_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_11708_cast_fp16)[name = string("op_11723_cast_fp16")]; + tensor var_11727 = const()[name = string("op_11727"), val = tensor([0, 2, 1])]; + tensor attn_output_189_cast_fp16 = transpose(perm = var_11727, x = var_11723_cast_fp16)[name = string("transpose_83")]; + tensor hidden_states_189_cast_fp16 = add(x = hidden_states_181_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_189_cast_fp16")]; + int32 var_11740 = const()[name = string("op_11740"), val = int32(-1)]; + fp16 const_643_promoted_to_fp16 = const()[name = string("const_643_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11742_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_643_promoted_to_fp16)[name = string("op_11742_cast_fp16")]; + bool input_335_interleave_0 = const()[name = string("input_335_interleave_0"), val = bool(false)]; + tensor input_335_cast_fp16 = concat(axis = var_11740, interleave = input_335_interleave_0, values = (hidden_states_189_cast_fp16, var_11742_cast_fp16))[name = string("input_335_cast_fp16")]; + tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; + fp16 var_11737_to_fp16 = const()[name = string("op_11737_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_11737_to_fp16, x = input_335_cast_fp16)[name = string("normed_301_cast_fp16")]; + tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; + tensor const_646_promoted_to_fp16 = const()[name = string("const_646_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447266560)))]; + tensor x_301_cast_fp16 = mul(x = normed_303_cast_fp16, y = const_646_promoted_to_fp16)[name = string("x_301_cast_fp16")]; + tensor var_11767 = const()[name = string("op_11767"), val = tensor([0, 2, 1])]; + tensor input_337_axes_0 = const()[name = string("input_337_axes_0"), val = tensor([2])]; + tensor var_11768 = transpose(perm = var_11767, x = x_301_cast_fp16)[name = string("transpose_82")]; + tensor input_337 = expand_dims(axes = input_337_axes_0, x = var_11768)[name = string("input_337")]; + string input_339_pad_type_0 = const()[name = string("input_339_pad_type_0"), val = string("valid")]; + tensor input_339_strides_0 = const()[name = string("input_339_strides_0"), val = tensor([1, 1])]; + tensor input_339_pad_0 = const()[name = string("input_339_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_339_dilations_0 = const()[name = string("input_339_dilations_0"), val = tensor([1, 1])]; + int32 input_339_groups_0 = const()[name = string("input_339_groups_0"), val = int32(1)]; + tensor input_339 = conv(dilations = input_339_dilations_0, groups = input_339_groups_0, pad = input_339_pad_0, pad_type = input_339_pad_type_0, strides = input_339_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_337)[name = string("input_339")]; + string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; + tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; + tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; + int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; + tensor b_37 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_337)[name = string("b_37")]; + tensor c_37 = silu(x = input_339)[name = string("c_37")]; + tensor input_341 = mul(x = c_37, y = b_37)[name = string("input_341")]; + string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; + tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; + tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; + int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; + tensor e_37 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_341)[name = string("e_37")]; + tensor var_11790_axes_0 = const()[name = string("op_11790_axes_0"), val = tensor([2])]; + tensor var_11790 = squeeze(axes = var_11790_axes_0, x = e_37)[name = string("op_11790")]; + tensor var_11791 = const()[name = string("op_11791"), val = tensor([0, 2, 1])]; + tensor var_11792 = transpose(perm = var_11791, x = var_11790)[name = string("transpose_81")]; + tensor hidden_states_191_cast_fp16 = add(x = hidden_states_189_cast_fp16, y = var_11792)[name = string("hidden_states_191_cast_fp16")]; + int32 var_11804 = const()[name = string("op_11804"), val = int32(-1)]; + fp16 const_647_promoted_to_fp16 = const()[name = string("const_647_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_11806_cast_fp16 = mul(x = hidden_states_191_cast_fp16, y = const_647_promoted_to_fp16)[name = string("op_11806_cast_fp16")]; + bool input_343_interleave_0 = const()[name = string("input_343_interleave_0"), val = bool(false)]; + tensor input_343_cast_fp16 = concat(axis = var_11804, interleave = input_343_interleave_0, values = (hidden_states_191_cast_fp16, var_11806_cast_fp16))[name = string("input_343_cast_fp16")]; + tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; + fp16 var_11801_to_fp16 = const()[name = string("op_11801_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_11801_to_fp16, x = input_343_cast_fp16)[name = string("normed_305_cast_fp16")]; + tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; + tensor const_650_promoted_to_fp16 = const()[name = string("const_650_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447268672)))]; + tensor hidden_states_193_cast_fp16 = mul(x = normed_307_cast_fp16, y = const_650_promoted_to_fp16)[name = string("hidden_states_193_cast_fp16")]; + tensor var_11829 = const()[name = string("op_11829"), val = tensor([0, 2, 1])]; + tensor var_11832_axes_0 = const()[name = string("op_11832_axes_0"), val = tensor([2])]; + tensor var_11830_cast_fp16 = transpose(perm = var_11829, x = hidden_states_193_cast_fp16)[name = string("transpose_80")]; + tensor var_11832_cast_fp16 = expand_dims(axes = var_11832_axes_0, x = var_11830_cast_fp16)[name = string("op_11832_cast_fp16")]; + string query_states_153_pad_type_0 = const()[name = string("query_states_153_pad_type_0"), val = string("valid")]; + tensor query_states_153_strides_0 = const()[name = string("query_states_153_strides_0"), val = tensor([1, 1])]; + tensor query_states_153_pad_0 = const()[name = string("query_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_153_dilations_0 = const()[name = string("query_states_153_dilations_0"), val = tensor([1, 1])]; + int32 query_states_153_groups_0 = const()[name = string("query_states_153_groups_0"), val = int32(1)]; + tensor query_states_153 = conv(dilations = query_states_153_dilations_0, groups = query_states_153_groups_0, pad = query_states_153_pad_0, pad_type = query_states_153_pad_type_0, strides = query_states_153_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_11832_cast_fp16)[name = string("query_states_153")]; + string key_states_191_pad_type_0 = const()[name = string("key_states_191_pad_type_0"), val = string("valid")]; + tensor key_states_191_strides_0 = const()[name = string("key_states_191_strides_0"), val = tensor([1, 1])]; + tensor key_states_191_pad_0 = const()[name = string("key_states_191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_191_dilations_0 = const()[name = string("key_states_191_dilations_0"), val = tensor([1, 1])]; + int32 key_states_191_groups_0 = const()[name = string("key_states_191_groups_0"), val = int32(1)]; + tensor key_states_191 = conv(dilations = key_states_191_dilations_0, groups = key_states_191_groups_0, pad = key_states_191_pad_0, pad_type = key_states_191_pad_type_0, strides = key_states_191_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_11832_cast_fp16)[name = string("key_states_191")]; + string value_states_153_pad_type_0 = const()[name = string("value_states_153_pad_type_0"), val = string("valid")]; + tensor value_states_153_strides_0 = const()[name = string("value_states_153_strides_0"), val = tensor([1, 1])]; + tensor value_states_153_pad_0 = const()[name = string("value_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_153_dilations_0 = const()[name = string("value_states_153_dilations_0"), val = tensor([1, 1])]; + int32 value_states_153_groups_0 = const()[name = string("value_states_153_groups_0"), val = int32(1)]; + tensor value_states_153 = conv(dilations = value_states_153_dilations_0, groups = value_states_153_groups_0, pad = value_states_153_pad_0, pad_type = value_states_153_pad_type_0, strides = value_states_153_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_11832_cast_fp16)[name = string("value_states_153")]; + tensor var_11874 = const()[name = string("op_11874"), val = tensor([1, 16, 128, 64])]; + tensor var_11875 = reshape(shape = var_11874, x = query_states_153)[name = string("op_11875")]; + tensor var_11880 = const()[name = string("op_11880"), val = tensor([0, 1, 3, 2])]; + tensor var_11885 = const()[name = string("op_11885"), val = tensor([1, 8, 128, 64])]; + tensor var_11886 = reshape(shape = var_11885, x = key_states_191)[name = string("op_11886")]; + tensor var_11891 = const()[name = string("op_11891"), val = tensor([0, 1, 3, 2])]; + tensor var_11896 = const()[name = string("op_11896"), val = tensor([1, 8, 128, 64])]; + tensor var_11897 = reshape(shape = var_11896, x = value_states_153)[name = string("op_11897")]; + tensor var_11902 = const()[name = string("op_11902"), val = tensor([0, 1, 3, 2])]; + int32 var_11913 = const()[name = string("op_11913"), val = int32(-1)]; + fp16 const_652_promoted = const()[name = string("const_652_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_195 = transpose(perm = var_11880, x = var_11875)[name = string("transpose_79")]; + tensor var_11915 = mul(x = hidden_states_195, y = const_652_promoted)[name = string("op_11915")]; + bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; + tensor input_347 = concat(axis = var_11913, interleave = input_347_interleave_0, values = (hidden_states_195, var_11915))[name = string("input_347")]; + tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; + fp16 var_11910_to_fp16 = const()[name = string("op_11910_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_11910_to_fp16, x = input_347)[name = string("normed_309_cast_fp16")]; + tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_311 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311")]; + tensor const_655 = const()[name = string("const_655"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447270784)))]; + tensor q_39 = mul(x = normed_311, y = const_655)[name = string("q_39")]; + int32 var_11938 = const()[name = string("op_11938"), val = int32(-1)]; + fp16 const_656_promoted = const()[name = string("const_656_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_197 = transpose(perm = var_11891, x = var_11886)[name = string("transpose_78")]; + tensor var_11940 = mul(x = hidden_states_197, y = const_656_promoted)[name = string("op_11940")]; + bool input_349_interleave_0 = const()[name = string("input_349_interleave_0"), val = bool(false)]; + tensor input_349 = concat(axis = var_11938, interleave = input_349_interleave_0, values = (hidden_states_197, var_11940))[name = string("input_349")]; + tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; + fp16 var_11935_to_fp16 = const()[name = string("op_11935_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_11935_to_fp16, x = input_349)[name = string("normed_313_cast_fp16")]; + tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_315 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315")]; + tensor const_659 = const()[name = string("const_659"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447271104)))]; + tensor k_39 = mul(x = normed_315, y = const_659)[name = string("k_39")]; + tensor var_11966 = mul(x = q_39, y = cos_5)[name = string("op_11966")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39)[name = string("x1_77")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39)[name = string("x2_77")]; + fp16 const_662_promoted = const()[name = string("const_662_promoted"), val = fp16(-0x1p+0)]; + tensor var_11987 = mul(x = x2_77, y = const_662_promoted)[name = string("op_11987")]; + int32 var_11989 = const()[name = string("op_11989"), val = int32(-1)]; + bool var_11990_interleave_0 = const()[name = string("op_11990_interleave_0"), val = bool(false)]; + tensor var_11990 = concat(axis = var_11989, interleave = var_11990_interleave_0, values = (var_11987, x1_77))[name = string("op_11990")]; + tensor var_11991 = mul(x = var_11990, y = sin_5)[name = string("op_11991")]; + tensor query_states_155 = add(x = var_11966, y = var_11991)[name = string("query_states_155")]; + tensor var_11994 = mul(x = k_39, y = cos_5)[name = string("op_11994")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39)[name = string("x1_79")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39)[name = string("x2_79")]; + fp16 const_665_promoted = const()[name = string("const_665_promoted"), val = fp16(-0x1p+0)]; + tensor var_12015 = mul(x = x2_79, y = const_665_promoted)[name = string("op_12015")]; + int32 var_12017 = const()[name = string("op_12017"), val = int32(-1)]; + bool var_12018_interleave_0 = const()[name = string("op_12018_interleave_0"), val = bool(false)]; + tensor var_12018 = concat(axis = var_12017, interleave = var_12018_interleave_0, values = (var_12015, x1_79))[name = string("op_12018")]; + tensor var_12019 = mul(x = var_12018, y = sin_5)[name = string("op_12019")]; + tensor key_states_193 = add(x = var_11994, y = var_12019)[name = string("key_states_193")]; + tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([19])]; + tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; + tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; + tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([20])]; + int32 concat_344_axis_0 = const()[name = string("concat_344_axis_0"), val = int32(0)]; + bool concat_344_interleave_0 = const()[name = string("concat_344_interleave_0"), val = bool(false)]; + tensor concat_344 = concat(axis = concat_344_axis_0, interleave = concat_344_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_344")]; + tensor concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = tensor([0])]; + tensor concat_345_values3_0 = const()[name = string("concat_345_values3_0"), val = tensor([0])]; + int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; + bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; + tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (expand_dims_232, concat_345_values1_0, var_1781, concat_345_values3_0))[name = string("concat_345")]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_344, begin_mask = model_model_kv_cache_0_internal_tensor_assign_39_begin_mask_0, end = concat_345, end_mask = model_model_kv_cache_0_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_39_stride_0, update = key_states_193, x = coreml_update_state_93)[name = string("model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_94")]; + tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([47])]; + tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; + tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; + tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([48])]; + int32 concat_348_axis_0 = const()[name = string("concat_348_axis_0"), val = int32(0)]; + bool concat_348_interleave_0 = const()[name = string("concat_348_interleave_0"), val = bool(false)]; + tensor concat_348 = concat(axis = concat_348_axis_0, interleave = concat_348_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_348")]; + tensor concat_349_values1_0 = const()[name = string("concat_349_values1_0"), val = tensor([0])]; + tensor concat_349_values3_0 = const()[name = string("concat_349_values3_0"), val = tensor([0])]; + int32 concat_349_axis_0 = const()[name = string("concat_349_axis_0"), val = int32(0)]; + bool concat_349_interleave_0 = const()[name = string("concat_349_interleave_0"), val = bool(false)]; + tensor concat_349 = concat(axis = concat_349_axis_0, interleave = concat_349_interleave_0, values = (expand_dims_238, concat_349_values1_0, var_1781, concat_349_values3_0))[name = string("concat_349")]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_155 = transpose(perm = var_11902, x = var_11897)[name = string("transpose_77")]; + tensor model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_348, begin_mask = model_model_kv_cache_0_internal_tensor_assign_40_begin_mask_0, end = concat_349, end_mask = model_model_kv_cache_0_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_40_stride_0, update = value_states_155, x = coreml_update_state_94)[name = string("model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_95")]; + tensor var_12090_begin_0 = const()[name = string("op_12090_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_12090_end_0 = const()[name = string("op_12090_end_0"), val = tensor([20, 8, 4096, 128])]; + tensor var_12090_end_mask_0 = const()[name = string("op_12090_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12090_cast_fp16 = slice_by_index(begin = var_12090_begin_0, end = var_12090_end_0, end_mask = var_12090_end_mask_0, x = coreml_update_state_95)[name = string("op_12090_cast_fp16")]; + tensor K_layer_cache_39_axes_0 = const()[name = string("K_layer_cache_39_axes_0"), val = tensor([0])]; + tensor K_layer_cache_39_cast_fp16 = squeeze(axes = K_layer_cache_39_axes_0, x = var_12090_cast_fp16)[name = string("K_layer_cache_39_cast_fp16")]; + tensor var_12097_begin_0 = const()[name = string("op_12097_begin_0"), val = tensor([47, 0, 0, 0])]; + tensor var_12097_end_0 = const()[name = string("op_12097_end_0"), val = tensor([48, 8, 4096, 128])]; + tensor var_12097_end_mask_0 = const()[name = string("op_12097_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12097_cast_fp16 = slice_by_index(begin = var_12097_begin_0, end = var_12097_end_0, end_mask = var_12097_end_mask_0, x = coreml_update_state_95)[name = string("op_12097_cast_fp16")]; + tensor V_layer_cache_39_axes_0 = const()[name = string("V_layer_cache_39_axes_0"), val = tensor([0])]; + tensor V_layer_cache_39_cast_fp16 = squeeze(axes = V_layer_cache_39_axes_0, x = var_12097_cast_fp16)[name = string("V_layer_cache_39_cast_fp16")]; + tensor x_307_axes_0 = const()[name = string("x_307_axes_0"), val = tensor([1])]; + tensor x_307_cast_fp16 = expand_dims(axes = x_307_axes_0, x = K_layer_cache_39_cast_fp16)[name = string("x_307_cast_fp16")]; + tensor var_12126 = const()[name = string("op_12126"), val = tensor([1, 2, 1, 1])]; + tensor x_309_cast_fp16 = tile(reps = var_12126, x = x_307_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor var_12138 = const()[name = string("op_12138"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_197_cast_fp16 = reshape(shape = var_12138, x = x_309_cast_fp16)[name = string("key_states_197_cast_fp16")]; + tensor x_313_axes_0 = const()[name = string("x_313_axes_0"), val = tensor([1])]; + tensor x_313_cast_fp16 = expand_dims(axes = x_313_axes_0, x = V_layer_cache_39_cast_fp16)[name = string("x_313_cast_fp16")]; + tensor var_12146 = const()[name = string("op_12146"), val = tensor([1, 2, 1, 1])]; + tensor x_315_cast_fp16 = tile(reps = var_12146, x = x_313_cast_fp16)[name = string("x_315_cast_fp16")]; + bool var_12173_transpose_x_0 = const()[name = string("op_12173_transpose_x_0"), val = bool(false)]; + bool var_12173_transpose_y_0 = const()[name = string("op_12173_transpose_y_0"), val = bool(true)]; + tensor var_12173 = matmul(transpose_x = var_12173_transpose_x_0, transpose_y = var_12173_transpose_y_0, x = query_states_155, y = key_states_197_cast_fp16)[name = string("op_12173")]; + fp16 var_12174_to_fp16 = const()[name = string("op_12174_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_77_cast_fp16 = mul(x = var_12173, y = var_12174_to_fp16)[name = string("attn_weights_77_cast_fp16")]; + tensor attn_weights_79_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = causal_mask)[name = string("attn_weights_79_cast_fp16")]; + int32 var_12209 = const()[name = string("op_12209"), val = int32(-1)]; + tensor var_12211_cast_fp16 = softmax(axis = var_12209, x = attn_weights_79_cast_fp16)[name = string("op_12211_cast_fp16")]; + tensor concat_354 = const()[name = string("concat_354"), val = tensor([16, 64, 4096])]; + tensor reshape_57_cast_fp16 = reshape(shape = concat_354, x = var_12211_cast_fp16)[name = string("reshape_57_cast_fp16")]; + tensor concat_355 = const()[name = string("concat_355"), val = tensor([16, 4096, 128])]; + tensor reshape_58_cast_fp16 = reshape(shape = concat_355, x = x_315_cast_fp16)[name = string("reshape_58_cast_fp16")]; + bool matmul_19_transpose_x_0 = const()[name = string("matmul_19_transpose_x_0"), val = bool(false)]; + bool matmul_19_transpose_y_0 = const()[name = string("matmul_19_transpose_y_0"), val = bool(false)]; + tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_0, transpose_y = matmul_19_transpose_y_0, x = reshape_57_cast_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")]; + tensor concat_359 = const()[name = string("concat_359"), val = tensor([1, 16, 64, 128])]; + tensor reshape_59_cast_fp16 = reshape(shape = concat_359, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")]; + tensor var_12223_perm_0 = const()[name = string("op_12223_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12242 = const()[name = string("op_12242"), val = tensor([1, 64, 2048])]; + tensor var_12223_cast_fp16 = transpose(perm = var_12223_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_76")]; + tensor attn_output_195_cast_fp16 = reshape(shape = var_12242, x = var_12223_cast_fp16)[name = string("attn_output_195_cast_fp16")]; + tensor var_12247 = const()[name = string("op_12247"), val = tensor([0, 2, 1])]; + string var_12263_pad_type_0 = const()[name = string("op_12263_pad_type_0"), val = string("valid")]; + int32 var_12263_groups_0 = const()[name = string("op_12263_groups_0"), val = int32(1)]; + tensor var_12263_strides_0 = const()[name = string("op_12263_strides_0"), val = tensor([1])]; + tensor var_12263_pad_0 = const()[name = string("op_12263_pad_0"), val = tensor([0, 0])]; + tensor var_12263_dilations_0 = const()[name = string("op_12263_dilations_0"), val = tensor([1])]; + tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447271424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449368640))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12248_cast_fp16 = transpose(perm = var_12247, x = attn_output_195_cast_fp16)[name = string("transpose_75")]; + tensor var_12263_cast_fp16 = conv(dilations = var_12263_dilations_0, groups = var_12263_groups_0, pad = var_12263_pad_0, pad_type = var_12263_pad_type_0, strides = var_12263_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_12248_cast_fp16)[name = string("op_12263_cast_fp16")]; + tensor var_12267 = const()[name = string("op_12267"), val = tensor([0, 2, 1])]; + tensor attn_output_199_cast_fp16 = transpose(perm = var_12267, x = var_12263_cast_fp16)[name = string("transpose_74")]; + tensor hidden_states_199_cast_fp16 = add(x = hidden_states_191_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; + int32 var_12280 = const()[name = string("op_12280"), val = int32(-1)]; + fp16 const_677_promoted_to_fp16 = const()[name = string("const_677_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12282_cast_fp16 = mul(x = hidden_states_199_cast_fp16, y = const_677_promoted_to_fp16)[name = string("op_12282_cast_fp16")]; + bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; + tensor input_353_cast_fp16 = concat(axis = var_12280, interleave = input_353_interleave_0, values = (hidden_states_199_cast_fp16, var_12282_cast_fp16))[name = string("input_353_cast_fp16")]; + tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; + fp16 var_12277_to_fp16 = const()[name = string("op_12277_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_12277_to_fp16, x = input_353_cast_fp16)[name = string("normed_317_cast_fp16")]; + tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_319_cast_fp16 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319_cast_fp16")]; + tensor const_680_promoted_to_fp16 = const()[name = string("const_680_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449434240)))]; + tensor x_317_cast_fp16 = mul(x = normed_319_cast_fp16, y = const_680_promoted_to_fp16)[name = string("x_317_cast_fp16")]; + tensor var_12307 = const()[name = string("op_12307"), val = tensor([0, 2, 1])]; + tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; + tensor var_12308 = transpose(perm = var_12307, x = x_317_cast_fp16)[name = string("transpose_73")]; + tensor input_355 = expand_dims(axes = input_355_axes_0, x = var_12308)[name = string("input_355")]; + string input_357_pad_type_0 = const()[name = string("input_357_pad_type_0"), val = string("valid")]; + tensor input_357_strides_0 = const()[name = string("input_357_strides_0"), val = tensor([1, 1])]; + tensor input_357_pad_0 = const()[name = string("input_357_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_357_dilations_0 = const()[name = string("input_357_dilations_0"), val = tensor([1, 1])]; + int32 input_357_groups_0 = const()[name = string("input_357_groups_0"), val = int32(1)]; + tensor input_357 = conv(dilations = input_357_dilations_0, groups = input_357_groups_0, pad = input_357_pad_0, pad_type = input_357_pad_type_0, strides = input_357_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_355)[name = string("input_357")]; + string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; + tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; + tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; + int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; + tensor b_39 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_355)[name = string("b_39")]; + tensor c_39 = silu(x = input_357)[name = string("c_39")]; + tensor input_359 = mul(x = c_39, y = b_39)[name = string("input_359")]; + string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; + tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; + tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; + int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; + tensor e_39 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_359)[name = string("e_39")]; + tensor var_12330_axes_0 = const()[name = string("op_12330_axes_0"), val = tensor([2])]; + tensor var_12330 = squeeze(axes = var_12330_axes_0, x = e_39)[name = string("op_12330")]; + tensor var_12331 = const()[name = string("op_12331"), val = tensor([0, 2, 1])]; + tensor var_12332 = transpose(perm = var_12331, x = var_12330)[name = string("transpose_72")]; + tensor hidden_states_201_cast_fp16 = add(x = hidden_states_199_cast_fp16, y = var_12332)[name = string("hidden_states_201_cast_fp16")]; + int32 var_12344 = const()[name = string("op_12344"), val = int32(-1)]; + fp16 const_681_promoted_to_fp16 = const()[name = string("const_681_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12346_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = const_681_promoted_to_fp16)[name = string("op_12346_cast_fp16")]; + bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; + tensor input_361_cast_fp16 = concat(axis = var_12344, interleave = input_361_interleave_0, values = (hidden_states_201_cast_fp16, var_12346_cast_fp16))[name = string("input_361_cast_fp16")]; + tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; + fp16 var_12341_to_fp16 = const()[name = string("op_12341_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_12341_to_fp16, x = input_361_cast_fp16)[name = string("normed_321_cast_fp16")]; + tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_323_cast_fp16 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323_cast_fp16")]; + tensor const_684_promoted_to_fp16 = const()[name = string("const_684_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449436352)))]; + tensor hidden_states_203_cast_fp16 = mul(x = normed_323_cast_fp16, y = const_684_promoted_to_fp16)[name = string("hidden_states_203_cast_fp16")]; + tensor var_12369 = const()[name = string("op_12369"), val = tensor([0, 2, 1])]; + tensor var_12372_axes_0 = const()[name = string("op_12372_axes_0"), val = tensor([2])]; + tensor var_12370_cast_fp16 = transpose(perm = var_12369, x = hidden_states_203_cast_fp16)[name = string("transpose_71")]; + tensor var_12372_cast_fp16 = expand_dims(axes = var_12372_axes_0, x = var_12370_cast_fp16)[name = string("op_12372_cast_fp16")]; + string query_states_161_pad_type_0 = const()[name = string("query_states_161_pad_type_0"), val = string("valid")]; + tensor query_states_161_strides_0 = const()[name = string("query_states_161_strides_0"), val = tensor([1, 1])]; + tensor query_states_161_pad_0 = const()[name = string("query_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_161_dilations_0 = const()[name = string("query_states_161_dilations_0"), val = tensor([1, 1])]; + int32 query_states_161_groups_0 = const()[name = string("query_states_161_groups_0"), val = int32(1)]; + tensor query_states_161 = conv(dilations = query_states_161_dilations_0, groups = query_states_161_groups_0, pad = query_states_161_pad_0, pad_type = query_states_161_pad_type_0, strides = query_states_161_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_12372_cast_fp16)[name = string("query_states_161")]; + string key_states_201_pad_type_0 = const()[name = string("key_states_201_pad_type_0"), val = string("valid")]; + tensor key_states_201_strides_0 = const()[name = string("key_states_201_strides_0"), val = tensor([1, 1])]; + tensor key_states_201_pad_0 = const()[name = string("key_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_201_dilations_0 = const()[name = string("key_states_201_dilations_0"), val = tensor([1, 1])]; + int32 key_states_201_groups_0 = const()[name = string("key_states_201_groups_0"), val = int32(1)]; + tensor key_states_201 = conv(dilations = key_states_201_dilations_0, groups = key_states_201_groups_0, pad = key_states_201_pad_0, pad_type = key_states_201_pad_type_0, strides = key_states_201_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_12372_cast_fp16)[name = string("key_states_201")]; + string value_states_161_pad_type_0 = const()[name = string("value_states_161_pad_type_0"), val = string("valid")]; + tensor value_states_161_strides_0 = const()[name = string("value_states_161_strides_0"), val = tensor([1, 1])]; + tensor value_states_161_pad_0 = const()[name = string("value_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_161_dilations_0 = const()[name = string("value_states_161_dilations_0"), val = tensor([1, 1])]; + int32 value_states_161_groups_0 = const()[name = string("value_states_161_groups_0"), val = int32(1)]; + tensor value_states_161 = conv(dilations = value_states_161_dilations_0, groups = value_states_161_groups_0, pad = value_states_161_pad_0, pad_type = value_states_161_pad_type_0, strides = value_states_161_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_12372_cast_fp16)[name = string("value_states_161")]; + tensor var_12414 = const()[name = string("op_12414"), val = tensor([1, 16, 128, 64])]; + tensor var_12415 = reshape(shape = var_12414, x = query_states_161)[name = string("op_12415")]; + tensor var_12420 = const()[name = string("op_12420"), val = tensor([0, 1, 3, 2])]; + tensor var_12425 = const()[name = string("op_12425"), val = tensor([1, 8, 128, 64])]; + tensor var_12426 = reshape(shape = var_12425, x = key_states_201)[name = string("op_12426")]; + tensor var_12431 = const()[name = string("op_12431"), val = tensor([0, 1, 3, 2])]; + tensor var_12436 = const()[name = string("op_12436"), val = tensor([1, 8, 128, 64])]; + tensor var_12437 = reshape(shape = var_12436, x = value_states_161)[name = string("op_12437")]; + tensor var_12442 = const()[name = string("op_12442"), val = tensor([0, 1, 3, 2])]; + int32 var_12453 = const()[name = string("op_12453"), val = int32(-1)]; + fp16 const_686_promoted = const()[name = string("const_686_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_205 = transpose(perm = var_12420, x = var_12415)[name = string("transpose_70")]; + tensor var_12455 = mul(x = hidden_states_205, y = const_686_promoted)[name = string("op_12455")]; + bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; + tensor input_365 = concat(axis = var_12453, interleave = input_365_interleave_0, values = (hidden_states_205, var_12455))[name = string("input_365")]; + tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; + fp16 var_12450_to_fp16 = const()[name = string("op_12450_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_12450_to_fp16, x = input_365)[name = string("normed_325_cast_fp16")]; + tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_327 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327")]; + tensor const_689 = const()[name = string("const_689"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449438464)))]; + tensor q_41 = mul(x = normed_327, y = const_689)[name = string("q_41")]; + int32 var_12478 = const()[name = string("op_12478"), val = int32(-1)]; + fp16 const_690_promoted = const()[name = string("const_690_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_207 = transpose(perm = var_12431, x = var_12426)[name = string("transpose_69")]; + tensor var_12480 = mul(x = hidden_states_207, y = const_690_promoted)[name = string("op_12480")]; + bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; + tensor input_367 = concat(axis = var_12478, interleave = input_367_interleave_0, values = (hidden_states_207, var_12480))[name = string("input_367")]; + tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; + fp16 var_12475_to_fp16 = const()[name = string("op_12475_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_12475_to_fp16, x = input_367)[name = string("normed_329_cast_fp16")]; + tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_331 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331")]; + tensor const_693 = const()[name = string("const_693"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449438784)))]; + tensor k_41 = mul(x = normed_331, y = const_693)[name = string("k_41")]; + tensor var_12506 = mul(x = q_41, y = cos_5)[name = string("op_12506")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41)[name = string("x1_81")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41)[name = string("x2_81")]; + fp16 const_696_promoted = const()[name = string("const_696_promoted"), val = fp16(-0x1p+0)]; + tensor var_12527 = mul(x = x2_81, y = const_696_promoted)[name = string("op_12527")]; + int32 var_12529 = const()[name = string("op_12529"), val = int32(-1)]; + bool var_12530_interleave_0 = const()[name = string("op_12530_interleave_0"), val = bool(false)]; + tensor var_12530 = concat(axis = var_12529, interleave = var_12530_interleave_0, values = (var_12527, x1_81))[name = string("op_12530")]; + tensor var_12531 = mul(x = var_12530, y = sin_5)[name = string("op_12531")]; + tensor query_states_163 = add(x = var_12506, y = var_12531)[name = string("query_states_163")]; + tensor var_12534 = mul(x = k_41, y = cos_5)[name = string("op_12534")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41)[name = string("x1_83")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41)[name = string("x2_83")]; + fp16 const_699_promoted = const()[name = string("const_699_promoted"), val = fp16(-0x1p+0)]; + tensor var_12555 = mul(x = x2_83, y = const_699_promoted)[name = string("op_12555")]; + int32 var_12557 = const()[name = string("op_12557"), val = int32(-1)]; + bool var_12558_interleave_0 = const()[name = string("op_12558_interleave_0"), val = bool(false)]; + tensor var_12558 = concat(axis = var_12557, interleave = var_12558_interleave_0, values = (var_12555, x1_83))[name = string("op_12558")]; + tensor var_12559 = mul(x = var_12558, y = sin_5)[name = string("op_12559")]; + tensor key_states_203 = add(x = var_12534, y = var_12559)[name = string("key_states_203")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([20])]; + tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([21])]; + int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; + bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; + tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_362")]; + tensor concat_363_values1_0 = const()[name = string("concat_363_values1_0"), val = tensor([0])]; + tensor concat_363_values3_0 = const()[name = string("concat_363_values3_0"), val = tensor([0])]; + int32 concat_363_axis_0 = const()[name = string("concat_363_axis_0"), val = int32(0)]; + bool concat_363_interleave_0 = const()[name = string("concat_363_interleave_0"), val = bool(false)]; + tensor concat_363 = concat(axis = concat_363_axis_0, interleave = concat_363_interleave_0, values = (expand_dims_244, concat_363_values1_0, var_1781, concat_363_values3_0))[name = string("concat_363")]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_362, begin_mask = model_model_kv_cache_0_internal_tensor_assign_41_begin_mask_0, end = concat_363, end_mask = model_model_kv_cache_0_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_41_stride_0, update = key_states_203, x = coreml_update_state_95)[name = string("model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_96")]; + tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([48])]; + tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; + tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; + tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([49])]; + int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; + bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; + tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_366")]; + tensor concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = tensor([0])]; + tensor concat_367_values3_0 = const()[name = string("concat_367_values3_0"), val = tensor([0])]; + int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; + bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; + tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (expand_dims_250, concat_367_values1_0, var_1781, concat_367_values3_0))[name = string("concat_367")]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_163 = transpose(perm = var_12442, x = var_12437)[name = string("transpose_68")]; + tensor model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_366, begin_mask = model_model_kv_cache_0_internal_tensor_assign_42_begin_mask_0, end = concat_367, end_mask = model_model_kv_cache_0_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_42_stride_0, update = value_states_163, x = coreml_update_state_96)[name = string("model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_97")]; + tensor var_12630_begin_0 = const()[name = string("op_12630_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_12630_end_0 = const()[name = string("op_12630_end_0"), val = tensor([21, 8, 4096, 128])]; + tensor var_12630_end_mask_0 = const()[name = string("op_12630_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12630_cast_fp16 = slice_by_index(begin = var_12630_begin_0, end = var_12630_end_0, end_mask = var_12630_end_mask_0, x = coreml_update_state_97)[name = string("op_12630_cast_fp16")]; + tensor K_layer_cache_41_axes_0 = const()[name = string("K_layer_cache_41_axes_0"), val = tensor([0])]; + tensor K_layer_cache_41_cast_fp16 = squeeze(axes = K_layer_cache_41_axes_0, x = var_12630_cast_fp16)[name = string("K_layer_cache_41_cast_fp16")]; + tensor var_12637_begin_0 = const()[name = string("op_12637_begin_0"), val = tensor([48, 0, 0, 0])]; + tensor var_12637_end_0 = const()[name = string("op_12637_end_0"), val = tensor([49, 8, 4096, 128])]; + tensor var_12637_end_mask_0 = const()[name = string("op_12637_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_12637_cast_fp16 = slice_by_index(begin = var_12637_begin_0, end = var_12637_end_0, end_mask = var_12637_end_mask_0, x = coreml_update_state_97)[name = string("op_12637_cast_fp16")]; + tensor V_layer_cache_41_axes_0 = const()[name = string("V_layer_cache_41_axes_0"), val = tensor([0])]; + tensor V_layer_cache_41_cast_fp16 = squeeze(axes = V_layer_cache_41_axes_0, x = var_12637_cast_fp16)[name = string("V_layer_cache_41_cast_fp16")]; + tensor x_323_axes_0 = const()[name = string("x_323_axes_0"), val = tensor([1])]; + tensor x_323_cast_fp16 = expand_dims(axes = x_323_axes_0, x = K_layer_cache_41_cast_fp16)[name = string("x_323_cast_fp16")]; + tensor var_12666 = const()[name = string("op_12666"), val = tensor([1, 2, 1, 1])]; + tensor x_325_cast_fp16 = tile(reps = var_12666, x = x_323_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_12678 = const()[name = string("op_12678"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_207_cast_fp16 = reshape(shape = var_12678, x = x_325_cast_fp16)[name = string("key_states_207_cast_fp16")]; + tensor x_329_axes_0 = const()[name = string("x_329_axes_0"), val = tensor([1])]; + tensor x_329_cast_fp16 = expand_dims(axes = x_329_axes_0, x = V_layer_cache_41_cast_fp16)[name = string("x_329_cast_fp16")]; + tensor var_12686 = const()[name = string("op_12686"), val = tensor([1, 2, 1, 1])]; + tensor x_331_cast_fp16 = tile(reps = var_12686, x = x_329_cast_fp16)[name = string("x_331_cast_fp16")]; + bool var_12713_transpose_x_0 = const()[name = string("op_12713_transpose_x_0"), val = bool(false)]; + bool var_12713_transpose_y_0 = const()[name = string("op_12713_transpose_y_0"), val = bool(true)]; + tensor var_12713 = matmul(transpose_x = var_12713_transpose_x_0, transpose_y = var_12713_transpose_y_0, x = query_states_163, y = key_states_207_cast_fp16)[name = string("op_12713")]; + fp16 var_12714_to_fp16 = const()[name = string("op_12714_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_81_cast_fp16 = mul(x = var_12713, y = var_12714_to_fp16)[name = string("attn_weights_81_cast_fp16")]; + tensor attn_weights_83_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = causal_mask)[name = string("attn_weights_83_cast_fp16")]; + int32 var_12749 = const()[name = string("op_12749"), val = int32(-1)]; + tensor var_12751_cast_fp16 = softmax(axis = var_12749, x = attn_weights_83_cast_fp16)[name = string("op_12751_cast_fp16")]; + tensor concat_372 = const()[name = string("concat_372"), val = tensor([16, 64, 4096])]; + tensor reshape_60_cast_fp16 = reshape(shape = concat_372, x = var_12751_cast_fp16)[name = string("reshape_60_cast_fp16")]; + tensor concat_373 = const()[name = string("concat_373"), val = tensor([16, 4096, 128])]; + tensor reshape_61_cast_fp16 = reshape(shape = concat_373, x = x_331_cast_fp16)[name = string("reshape_61_cast_fp16")]; + bool matmul_20_transpose_x_0 = const()[name = string("matmul_20_transpose_x_0"), val = bool(false)]; + bool matmul_20_transpose_y_0 = const()[name = string("matmul_20_transpose_y_0"), val = bool(false)]; + tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_0, transpose_y = matmul_20_transpose_y_0, x = reshape_60_cast_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")]; + tensor concat_377 = const()[name = string("concat_377"), val = tensor([1, 16, 64, 128])]; + tensor reshape_62_cast_fp16 = reshape(shape = concat_377, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")]; + tensor var_12763_perm_0 = const()[name = string("op_12763_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_12782 = const()[name = string("op_12782"), val = tensor([1, 64, 2048])]; + tensor var_12763_cast_fp16 = transpose(perm = var_12763_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_67")]; + tensor attn_output_205_cast_fp16 = reshape(shape = var_12782, x = var_12763_cast_fp16)[name = string("attn_output_205_cast_fp16")]; + tensor var_12787 = const()[name = string("op_12787"), val = tensor([0, 2, 1])]; + string var_12803_pad_type_0 = const()[name = string("op_12803_pad_type_0"), val = string("valid")]; + int32 var_12803_groups_0 = const()[name = string("op_12803_groups_0"), val = int32(1)]; + tensor var_12803_strides_0 = const()[name = string("op_12803_strides_0"), val = tensor([1])]; + tensor var_12803_pad_0 = const()[name = string("op_12803_pad_0"), val = tensor([0, 0])]; + tensor var_12803_dilations_0 = const()[name = string("op_12803_dilations_0"), val = tensor([1])]; + tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449439104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451536320))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_12788_cast_fp16 = transpose(perm = var_12787, x = attn_output_205_cast_fp16)[name = string("transpose_66")]; + tensor var_12803_cast_fp16 = conv(dilations = var_12803_dilations_0, groups = var_12803_groups_0, pad = var_12803_pad_0, pad_type = var_12803_pad_type_0, strides = var_12803_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_12788_cast_fp16)[name = string("op_12803_cast_fp16")]; + tensor var_12807 = const()[name = string("op_12807"), val = tensor([0, 2, 1])]; + tensor attn_output_209_cast_fp16 = transpose(perm = var_12807, x = var_12803_cast_fp16)[name = string("transpose_65")]; + tensor hidden_states_209_cast_fp16 = add(x = hidden_states_201_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; + int32 var_12820 = const()[name = string("op_12820"), val = int32(-1)]; + fp16 const_711_promoted_to_fp16 = const()[name = string("const_711_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12822_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_711_promoted_to_fp16)[name = string("op_12822_cast_fp16")]; + bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; + tensor input_371_cast_fp16 = concat(axis = var_12820, interleave = input_371_interleave_0, values = (hidden_states_209_cast_fp16, var_12822_cast_fp16))[name = string("input_371_cast_fp16")]; + tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; + fp16 var_12817_to_fp16 = const()[name = string("op_12817_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_12817_to_fp16, x = input_371_cast_fp16)[name = string("normed_333_cast_fp16")]; + tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; + tensor const_714_promoted_to_fp16 = const()[name = string("const_714_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451601920)))]; + tensor x_333_cast_fp16 = mul(x = normed_335_cast_fp16, y = const_714_promoted_to_fp16)[name = string("x_333_cast_fp16")]; + tensor var_12847 = const()[name = string("op_12847"), val = tensor([0, 2, 1])]; + tensor input_373_axes_0 = const()[name = string("input_373_axes_0"), val = tensor([2])]; + tensor var_12848 = transpose(perm = var_12847, x = x_333_cast_fp16)[name = string("transpose_64")]; + tensor input_373 = expand_dims(axes = input_373_axes_0, x = var_12848)[name = string("input_373")]; + string input_375_pad_type_0 = const()[name = string("input_375_pad_type_0"), val = string("valid")]; + tensor input_375_strides_0 = const()[name = string("input_375_strides_0"), val = tensor([1, 1])]; + tensor input_375_pad_0 = const()[name = string("input_375_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_375_dilations_0 = const()[name = string("input_375_dilations_0"), val = tensor([1, 1])]; + int32 input_375_groups_0 = const()[name = string("input_375_groups_0"), val = int32(1)]; + tensor input_375 = conv(dilations = input_375_dilations_0, groups = input_375_groups_0, pad = input_375_pad_0, pad_type = input_375_pad_type_0, strides = input_375_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_373)[name = string("input_375")]; + string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; + tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; + tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; + int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; + tensor b_41 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_373)[name = string("b_41")]; + tensor c_41 = silu(x = input_375)[name = string("c_41")]; + tensor input_377 = mul(x = c_41, y = b_41)[name = string("input_377")]; + string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; + tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; + tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; + int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; + tensor e_41 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input_377)[name = string("e_41")]; + tensor var_12870_axes_0 = const()[name = string("op_12870_axes_0"), val = tensor([2])]; + tensor var_12870 = squeeze(axes = var_12870_axes_0, x = e_41)[name = string("op_12870")]; + tensor var_12871 = const()[name = string("op_12871"), val = tensor([0, 2, 1])]; + tensor var_12872 = transpose(perm = var_12871, x = var_12870)[name = string("transpose_63")]; + tensor hidden_states_211_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = var_12872)[name = string("hidden_states_211_cast_fp16")]; + int32 var_12884 = const()[name = string("op_12884"), val = int32(-1)]; + fp16 const_715_promoted_to_fp16 = const()[name = string("const_715_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_12886_cast_fp16 = mul(x = hidden_states_211_cast_fp16, y = const_715_promoted_to_fp16)[name = string("op_12886_cast_fp16")]; + bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; + tensor input_379_cast_fp16 = concat(axis = var_12884, interleave = input_379_interleave_0, values = (hidden_states_211_cast_fp16, var_12886_cast_fp16))[name = string("input_379_cast_fp16")]; + tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; + fp16 var_12881_to_fp16 = const()[name = string("op_12881_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_12881_to_fp16, x = input_379_cast_fp16)[name = string("normed_337_cast_fp16")]; + tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; + tensor const_718_promoted_to_fp16 = const()[name = string("const_718_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451604032)))]; + tensor hidden_states_213_cast_fp16 = mul(x = normed_339_cast_fp16, y = const_718_promoted_to_fp16)[name = string("hidden_states_213_cast_fp16")]; + tensor var_12909 = const()[name = string("op_12909"), val = tensor([0, 2, 1])]; + tensor var_12912_axes_0 = const()[name = string("op_12912_axes_0"), val = tensor([2])]; + tensor var_12910_cast_fp16 = transpose(perm = var_12909, x = hidden_states_213_cast_fp16)[name = string("transpose_62")]; + tensor var_12912_cast_fp16 = expand_dims(axes = var_12912_axes_0, x = var_12910_cast_fp16)[name = string("op_12912_cast_fp16")]; + string query_states_169_pad_type_0 = const()[name = string("query_states_169_pad_type_0"), val = string("valid")]; + tensor query_states_169_strides_0 = const()[name = string("query_states_169_strides_0"), val = tensor([1, 1])]; + tensor query_states_169_pad_0 = const()[name = string("query_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_169_dilations_0 = const()[name = string("query_states_169_dilations_0"), val = tensor([1, 1])]; + int32 query_states_169_groups_0 = const()[name = string("query_states_169_groups_0"), val = int32(1)]; + tensor query_states_169 = conv(dilations = query_states_169_dilations_0, groups = query_states_169_groups_0, pad = query_states_169_pad_0, pad_type = query_states_169_pad_type_0, strides = query_states_169_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_12912_cast_fp16)[name = string("query_states_169")]; + string key_states_211_pad_type_0 = const()[name = string("key_states_211_pad_type_0"), val = string("valid")]; + tensor key_states_211_strides_0 = const()[name = string("key_states_211_strides_0"), val = tensor([1, 1])]; + tensor key_states_211_pad_0 = const()[name = string("key_states_211_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_211_dilations_0 = const()[name = string("key_states_211_dilations_0"), val = tensor([1, 1])]; + int32 key_states_211_groups_0 = const()[name = string("key_states_211_groups_0"), val = int32(1)]; + tensor key_states_211 = conv(dilations = key_states_211_dilations_0, groups = key_states_211_groups_0, pad = key_states_211_pad_0, pad_type = key_states_211_pad_type_0, strides = key_states_211_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_12912_cast_fp16)[name = string("key_states_211")]; + string value_states_169_pad_type_0 = const()[name = string("value_states_169_pad_type_0"), val = string("valid")]; + tensor value_states_169_strides_0 = const()[name = string("value_states_169_strides_0"), val = tensor([1, 1])]; + tensor value_states_169_pad_0 = const()[name = string("value_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_169_dilations_0 = const()[name = string("value_states_169_dilations_0"), val = tensor([1, 1])]; + int32 value_states_169_groups_0 = const()[name = string("value_states_169_groups_0"), val = int32(1)]; + tensor value_states_169 = conv(dilations = value_states_169_dilations_0, groups = value_states_169_groups_0, pad = value_states_169_pad_0, pad_type = value_states_169_pad_type_0, strides = value_states_169_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_12912_cast_fp16)[name = string("value_states_169")]; + tensor var_12954 = const()[name = string("op_12954"), val = tensor([1, 16, 128, 64])]; + tensor var_12955 = reshape(shape = var_12954, x = query_states_169)[name = string("op_12955")]; + tensor var_12960 = const()[name = string("op_12960"), val = tensor([0, 1, 3, 2])]; + tensor var_12965 = const()[name = string("op_12965"), val = tensor([1, 8, 128, 64])]; + tensor var_12966 = reshape(shape = var_12965, x = key_states_211)[name = string("op_12966")]; + tensor var_12971 = const()[name = string("op_12971"), val = tensor([0, 1, 3, 2])]; + tensor var_12976 = const()[name = string("op_12976"), val = tensor([1, 8, 128, 64])]; + tensor var_12977 = reshape(shape = var_12976, x = value_states_169)[name = string("op_12977")]; + tensor var_12982 = const()[name = string("op_12982"), val = tensor([0, 1, 3, 2])]; + int32 var_12993 = const()[name = string("op_12993"), val = int32(-1)]; + fp16 const_720_promoted = const()[name = string("const_720_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_215 = transpose(perm = var_12960, x = var_12955)[name = string("transpose_61")]; + tensor var_12995 = mul(x = hidden_states_215, y = const_720_promoted)[name = string("op_12995")]; + bool input_383_interleave_0 = const()[name = string("input_383_interleave_0"), val = bool(false)]; + tensor input_383 = concat(axis = var_12993, interleave = input_383_interleave_0, values = (hidden_states_215, var_12995))[name = string("input_383")]; + tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; + fp16 var_12990_to_fp16 = const()[name = string("op_12990_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_12990_to_fp16, x = input_383)[name = string("normed_341_cast_fp16")]; + tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; + tensor const_723 = const()[name = string("const_723"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451606144)))]; + tensor q_43 = mul(x = normed_343, y = const_723)[name = string("q_43")]; + int32 var_13018 = const()[name = string("op_13018"), val = int32(-1)]; + fp16 const_724_promoted = const()[name = string("const_724_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_217 = transpose(perm = var_12971, x = var_12966)[name = string("transpose_60")]; + tensor var_13020 = mul(x = hidden_states_217, y = const_724_promoted)[name = string("op_13020")]; + bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; + tensor input_385 = concat(axis = var_13018, interleave = input_385_interleave_0, values = (hidden_states_217, var_13020))[name = string("input_385")]; + tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; + fp16 var_13015_to_fp16 = const()[name = string("op_13015_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_13015_to_fp16, x = input_385)[name = string("normed_345_cast_fp16")]; + tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; + tensor const_727 = const()[name = string("const_727"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451606464)))]; + tensor k_43 = mul(x = normed_347, y = const_727)[name = string("k_43")]; + tensor var_13046 = mul(x = q_43, y = cos_5)[name = string("op_13046")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43)[name = string("x1_85")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43)[name = string("x2_85")]; + fp16 const_730_promoted = const()[name = string("const_730_promoted"), val = fp16(-0x1p+0)]; + tensor var_13067 = mul(x = x2_85, y = const_730_promoted)[name = string("op_13067")]; + int32 var_13069 = const()[name = string("op_13069"), val = int32(-1)]; + bool var_13070_interleave_0 = const()[name = string("op_13070_interleave_0"), val = bool(false)]; + tensor var_13070 = concat(axis = var_13069, interleave = var_13070_interleave_0, values = (var_13067, x1_85))[name = string("op_13070")]; + tensor var_13071 = mul(x = var_13070, y = sin_5)[name = string("op_13071")]; + tensor query_states_171 = add(x = var_13046, y = var_13071)[name = string("query_states_171")]; + tensor var_13074 = mul(x = k_43, y = cos_5)[name = string("op_13074")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43)[name = string("x1_87")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43)[name = string("x2_87")]; + fp16 const_733_promoted = const()[name = string("const_733_promoted"), val = fp16(-0x1p+0)]; + tensor var_13095 = mul(x = x2_87, y = const_733_promoted)[name = string("op_13095")]; + int32 var_13097 = const()[name = string("op_13097"), val = int32(-1)]; + bool var_13098_interleave_0 = const()[name = string("op_13098_interleave_0"), val = bool(false)]; + tensor var_13098 = concat(axis = var_13097, interleave = var_13098_interleave_0, values = (var_13095, x1_87))[name = string("op_13098")]; + tensor var_13099 = mul(x = var_13098, y = sin_5)[name = string("op_13099")]; + tensor key_states_213 = add(x = var_13074, y = var_13099)[name = string("key_states_213")]; + tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([21])]; + tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; + tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; + tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([22])]; + int32 concat_380_axis_0 = const()[name = string("concat_380_axis_0"), val = int32(0)]; + bool concat_380_interleave_0 = const()[name = string("concat_380_interleave_0"), val = bool(false)]; + tensor concat_380 = concat(axis = concat_380_axis_0, interleave = concat_380_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_380")]; + tensor concat_381_values1_0 = const()[name = string("concat_381_values1_0"), val = tensor([0])]; + tensor concat_381_values3_0 = const()[name = string("concat_381_values3_0"), val = tensor([0])]; + int32 concat_381_axis_0 = const()[name = string("concat_381_axis_0"), val = int32(0)]; + bool concat_381_interleave_0 = const()[name = string("concat_381_interleave_0"), val = bool(false)]; + tensor concat_381 = concat(axis = concat_381_axis_0, interleave = concat_381_interleave_0, values = (expand_dims_256, concat_381_values1_0, var_1781, concat_381_values3_0))[name = string("concat_381")]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_380, begin_mask = model_model_kv_cache_0_internal_tensor_assign_43_begin_mask_0, end = concat_381, end_mask = model_model_kv_cache_0_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_43_stride_0, update = key_states_213, x = coreml_update_state_97)[name = string("model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_98")]; + tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([49])]; + tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; + tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; + tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([50])]; + int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; + bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; + tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_384")]; + tensor concat_385_values1_0 = const()[name = string("concat_385_values1_0"), val = tensor([0])]; + tensor concat_385_values3_0 = const()[name = string("concat_385_values3_0"), val = tensor([0])]; + int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; + bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; + tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (expand_dims_262, concat_385_values1_0, var_1781, concat_385_values3_0))[name = string("concat_385")]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_171 = transpose(perm = var_12982, x = var_12977)[name = string("transpose_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_384, begin_mask = model_model_kv_cache_0_internal_tensor_assign_44_begin_mask_0, end = concat_385, end_mask = model_model_kv_cache_0_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_44_stride_0, update = value_states_171, x = coreml_update_state_98)[name = string("model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_99")]; + tensor var_13170_begin_0 = const()[name = string("op_13170_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_13170_end_0 = const()[name = string("op_13170_end_0"), val = tensor([22, 8, 4096, 128])]; + tensor var_13170_end_mask_0 = const()[name = string("op_13170_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13170_cast_fp16 = slice_by_index(begin = var_13170_begin_0, end = var_13170_end_0, end_mask = var_13170_end_mask_0, x = coreml_update_state_99)[name = string("op_13170_cast_fp16")]; + tensor K_layer_cache_43_axes_0 = const()[name = string("K_layer_cache_43_axes_0"), val = tensor([0])]; + tensor K_layer_cache_43_cast_fp16 = squeeze(axes = K_layer_cache_43_axes_0, x = var_13170_cast_fp16)[name = string("K_layer_cache_43_cast_fp16")]; + tensor var_13177_begin_0 = const()[name = string("op_13177_begin_0"), val = tensor([49, 0, 0, 0])]; + tensor var_13177_end_0 = const()[name = string("op_13177_end_0"), val = tensor([50, 8, 4096, 128])]; + tensor var_13177_end_mask_0 = const()[name = string("op_13177_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13177_cast_fp16 = slice_by_index(begin = var_13177_begin_0, end = var_13177_end_0, end_mask = var_13177_end_mask_0, x = coreml_update_state_99)[name = string("op_13177_cast_fp16")]; + tensor V_layer_cache_43_axes_0 = const()[name = string("V_layer_cache_43_axes_0"), val = tensor([0])]; + tensor V_layer_cache_43_cast_fp16 = squeeze(axes = V_layer_cache_43_axes_0, x = var_13177_cast_fp16)[name = string("V_layer_cache_43_cast_fp16")]; + tensor x_339_axes_0 = const()[name = string("x_339_axes_0"), val = tensor([1])]; + tensor x_339_cast_fp16 = expand_dims(axes = x_339_axes_0, x = K_layer_cache_43_cast_fp16)[name = string("x_339_cast_fp16")]; + tensor var_13206 = const()[name = string("op_13206"), val = tensor([1, 2, 1, 1])]; + tensor x_341_cast_fp16 = tile(reps = var_13206, x = x_339_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_13218 = const()[name = string("op_13218"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_217_cast_fp16 = reshape(shape = var_13218, x = x_341_cast_fp16)[name = string("key_states_217_cast_fp16")]; + tensor x_345_axes_0 = const()[name = string("x_345_axes_0"), val = tensor([1])]; + tensor x_345_cast_fp16 = expand_dims(axes = x_345_axes_0, x = V_layer_cache_43_cast_fp16)[name = string("x_345_cast_fp16")]; + tensor var_13226 = const()[name = string("op_13226"), val = tensor([1, 2, 1, 1])]; + tensor x_347_cast_fp16 = tile(reps = var_13226, x = x_345_cast_fp16)[name = string("x_347_cast_fp16")]; + bool var_13253_transpose_x_0 = const()[name = string("op_13253_transpose_x_0"), val = bool(false)]; + bool var_13253_transpose_y_0 = const()[name = string("op_13253_transpose_y_0"), val = bool(true)]; + tensor var_13253 = matmul(transpose_x = var_13253_transpose_x_0, transpose_y = var_13253_transpose_y_0, x = query_states_171, y = key_states_217_cast_fp16)[name = string("op_13253")]; + fp16 var_13254_to_fp16 = const()[name = string("op_13254_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_85_cast_fp16 = mul(x = var_13253, y = var_13254_to_fp16)[name = string("attn_weights_85_cast_fp16")]; + tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = causal_mask)[name = string("attn_weights_87_cast_fp16")]; + int32 var_13289 = const()[name = string("op_13289"), val = int32(-1)]; + tensor var_13291_cast_fp16 = softmax(axis = var_13289, x = attn_weights_87_cast_fp16)[name = string("op_13291_cast_fp16")]; + tensor concat_390 = const()[name = string("concat_390"), val = tensor([16, 64, 4096])]; + tensor reshape_63_cast_fp16 = reshape(shape = concat_390, x = var_13291_cast_fp16)[name = string("reshape_63_cast_fp16")]; + tensor concat_391 = const()[name = string("concat_391"), val = tensor([16, 4096, 128])]; + tensor reshape_64_cast_fp16 = reshape(shape = concat_391, x = x_347_cast_fp16)[name = string("reshape_64_cast_fp16")]; + bool matmul_21_transpose_x_0 = const()[name = string("matmul_21_transpose_x_0"), val = bool(false)]; + bool matmul_21_transpose_y_0 = const()[name = string("matmul_21_transpose_y_0"), val = bool(false)]; + tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_0, transpose_y = matmul_21_transpose_y_0, x = reshape_63_cast_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")]; + tensor concat_395 = const()[name = string("concat_395"), val = tensor([1, 16, 64, 128])]; + tensor reshape_65_cast_fp16 = reshape(shape = concat_395, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")]; + tensor var_13303_perm_0 = const()[name = string("op_13303_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13322 = const()[name = string("op_13322"), val = tensor([1, 64, 2048])]; + tensor var_13303_cast_fp16 = transpose(perm = var_13303_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_58")]; + tensor attn_output_215_cast_fp16 = reshape(shape = var_13322, x = var_13303_cast_fp16)[name = string("attn_output_215_cast_fp16")]; + tensor var_13327 = const()[name = string("op_13327"), val = tensor([0, 2, 1])]; + string var_13343_pad_type_0 = const()[name = string("op_13343_pad_type_0"), val = string("valid")]; + int32 var_13343_groups_0 = const()[name = string("op_13343_groups_0"), val = int32(1)]; + tensor var_13343_strides_0 = const()[name = string("op_13343_strides_0"), val = tensor([1])]; + tensor var_13343_pad_0 = const()[name = string("op_13343_pad_0"), val = tensor([0, 0])]; + tensor var_13343_dilations_0 = const()[name = string("op_13343_dilations_0"), val = tensor([1])]; + tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(451606784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453704000))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13328_cast_fp16 = transpose(perm = var_13327, x = attn_output_215_cast_fp16)[name = string("transpose_57")]; + tensor var_13343_cast_fp16 = conv(dilations = var_13343_dilations_0, groups = var_13343_groups_0, pad = var_13343_pad_0, pad_type = var_13343_pad_type_0, strides = var_13343_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_13328_cast_fp16)[name = string("op_13343_cast_fp16")]; + tensor var_13347 = const()[name = string("op_13347"), val = tensor([0, 2, 1])]; + tensor attn_output_219_cast_fp16 = transpose(perm = var_13347, x = var_13343_cast_fp16)[name = string("transpose_56")]; + tensor hidden_states_219_cast_fp16 = add(x = hidden_states_211_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; + int32 var_13360 = const()[name = string("op_13360"), val = int32(-1)]; + fp16 const_745_promoted_to_fp16 = const()[name = string("const_745_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13362_cast_fp16 = mul(x = hidden_states_219_cast_fp16, y = const_745_promoted_to_fp16)[name = string("op_13362_cast_fp16")]; + bool input_389_interleave_0 = const()[name = string("input_389_interleave_0"), val = bool(false)]; + tensor input_389_cast_fp16 = concat(axis = var_13360, interleave = input_389_interleave_0, values = (hidden_states_219_cast_fp16, var_13362_cast_fp16))[name = string("input_389_cast_fp16")]; + tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; + fp16 var_13357_to_fp16 = const()[name = string("op_13357_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_13357_to_fp16, x = input_389_cast_fp16)[name = string("normed_349_cast_fp16")]; + tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; + tensor const_748_promoted_to_fp16 = const()[name = string("const_748_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453769600)))]; + tensor x_349_cast_fp16 = mul(x = normed_351_cast_fp16, y = const_748_promoted_to_fp16)[name = string("x_349_cast_fp16")]; + tensor var_13387 = const()[name = string("op_13387"), val = tensor([0, 2, 1])]; + tensor input_391_axes_0 = const()[name = string("input_391_axes_0"), val = tensor([2])]; + tensor var_13388 = transpose(perm = var_13387, x = x_349_cast_fp16)[name = string("transpose_55")]; + tensor input_391 = expand_dims(axes = input_391_axes_0, x = var_13388)[name = string("input_391")]; + string input_393_pad_type_0 = const()[name = string("input_393_pad_type_0"), val = string("valid")]; + tensor input_393_strides_0 = const()[name = string("input_393_strides_0"), val = tensor([1, 1])]; + tensor input_393_pad_0 = const()[name = string("input_393_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_393_dilations_0 = const()[name = string("input_393_dilations_0"), val = tensor([1, 1])]; + int32 input_393_groups_0 = const()[name = string("input_393_groups_0"), val = int32(1)]; + tensor input_393 = conv(dilations = input_393_dilations_0, groups = input_393_groups_0, pad = input_393_pad_0, pad_type = input_393_pad_type_0, strides = input_393_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_palettized, x = input_391)[name = string("input_393")]; + string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; + tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; + tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; + int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; + tensor b_43 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_palettized, x = input_391)[name = string("b_43")]; + tensor c_43 = silu(x = input_393)[name = string("c_43")]; + tensor input_395 = mul(x = c_43, y = b_43)[name = string("input_395")]; + string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; + tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; + tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; + int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; + tensor e_43 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_palettized, x = input_395)[name = string("e_43")]; + tensor var_13410_axes_0 = const()[name = string("op_13410_axes_0"), val = tensor([2])]; + tensor var_13410 = squeeze(axes = var_13410_axes_0, x = e_43)[name = string("op_13410")]; + tensor var_13411 = const()[name = string("op_13411"), val = tensor([0, 2, 1])]; + tensor var_13412 = transpose(perm = var_13411, x = var_13410)[name = string("transpose_54")]; + tensor hidden_states_221_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = var_13412)[name = string("hidden_states_221_cast_fp16")]; + int32 var_13424 = const()[name = string("op_13424"), val = int32(-1)]; + fp16 const_749_promoted_to_fp16 = const()[name = string("const_749_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13426_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_749_promoted_to_fp16)[name = string("op_13426_cast_fp16")]; + bool input_397_interleave_0 = const()[name = string("input_397_interleave_0"), val = bool(false)]; + tensor input_397_cast_fp16 = concat(axis = var_13424, interleave = input_397_interleave_0, values = (hidden_states_221_cast_fp16, var_13426_cast_fp16))[name = string("input_397_cast_fp16")]; + tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; + fp16 var_13421_to_fp16 = const()[name = string("op_13421_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_13421_to_fp16, x = input_397_cast_fp16)[name = string("normed_353_cast_fp16")]; + tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; + tensor const_752_promoted_to_fp16 = const()[name = string("const_752_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453771712)))]; + tensor hidden_states_223_cast_fp16 = mul(x = normed_355_cast_fp16, y = const_752_promoted_to_fp16)[name = string("hidden_states_223_cast_fp16")]; + tensor var_13449 = const()[name = string("op_13449"), val = tensor([0, 2, 1])]; + tensor var_13452_axes_0 = const()[name = string("op_13452_axes_0"), val = tensor([2])]; + tensor var_13450_cast_fp16 = transpose(perm = var_13449, x = hidden_states_223_cast_fp16)[name = string("transpose_53")]; + tensor var_13452_cast_fp16 = expand_dims(axes = var_13452_axes_0, x = var_13450_cast_fp16)[name = string("op_13452_cast_fp16")]; + string query_states_177_pad_type_0 = const()[name = string("query_states_177_pad_type_0"), val = string("valid")]; + tensor query_states_177_strides_0 = const()[name = string("query_states_177_strides_0"), val = tensor([1, 1])]; + tensor query_states_177_pad_0 = const()[name = string("query_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_177_dilations_0 = const()[name = string("query_states_177_dilations_0"), val = tensor([1, 1])]; + int32 query_states_177_groups_0 = const()[name = string("query_states_177_groups_0"), val = int32(1)]; + tensor query_states_177 = conv(dilations = query_states_177_dilations_0, groups = query_states_177_groups_0, pad = query_states_177_pad_0, pad_type = query_states_177_pad_type_0, strides = query_states_177_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_13452_cast_fp16)[name = string("query_states_177")]; + string key_states_221_pad_type_0 = const()[name = string("key_states_221_pad_type_0"), val = string("valid")]; + tensor key_states_221_strides_0 = const()[name = string("key_states_221_strides_0"), val = tensor([1, 1])]; + tensor key_states_221_pad_0 = const()[name = string("key_states_221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_221_dilations_0 = const()[name = string("key_states_221_dilations_0"), val = tensor([1, 1])]; + int32 key_states_221_groups_0 = const()[name = string("key_states_221_groups_0"), val = int32(1)]; + tensor key_states_221 = conv(dilations = key_states_221_dilations_0, groups = key_states_221_groups_0, pad = key_states_221_pad_0, pad_type = key_states_221_pad_type_0, strides = key_states_221_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_13452_cast_fp16)[name = string("key_states_221")]; + string value_states_177_pad_type_0 = const()[name = string("value_states_177_pad_type_0"), val = string("valid")]; + tensor value_states_177_strides_0 = const()[name = string("value_states_177_strides_0"), val = tensor([1, 1])]; + tensor value_states_177_pad_0 = const()[name = string("value_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_177_dilations_0 = const()[name = string("value_states_177_dilations_0"), val = tensor([1, 1])]; + int32 value_states_177_groups_0 = const()[name = string("value_states_177_groups_0"), val = int32(1)]; + tensor value_states_177 = conv(dilations = value_states_177_dilations_0, groups = value_states_177_groups_0, pad = value_states_177_pad_0, pad_type = value_states_177_pad_type_0, strides = value_states_177_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_13452_cast_fp16)[name = string("value_states_177")]; + tensor var_13494 = const()[name = string("op_13494"), val = tensor([1, 16, 128, 64])]; + tensor var_13495 = reshape(shape = var_13494, x = query_states_177)[name = string("op_13495")]; + tensor var_13500 = const()[name = string("op_13500"), val = tensor([0, 1, 3, 2])]; + tensor var_13505 = const()[name = string("op_13505"), val = tensor([1, 8, 128, 64])]; + tensor var_13506 = reshape(shape = var_13505, x = key_states_221)[name = string("op_13506")]; + tensor var_13511 = const()[name = string("op_13511"), val = tensor([0, 1, 3, 2])]; + tensor var_13516 = const()[name = string("op_13516"), val = tensor([1, 8, 128, 64])]; + tensor var_13517 = reshape(shape = var_13516, x = value_states_177)[name = string("op_13517")]; + tensor var_13522 = const()[name = string("op_13522"), val = tensor([0, 1, 3, 2])]; + int32 var_13533 = const()[name = string("op_13533"), val = int32(-1)]; + fp16 const_754_promoted = const()[name = string("const_754_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_225 = transpose(perm = var_13500, x = var_13495)[name = string("transpose_52")]; + tensor var_13535 = mul(x = hidden_states_225, y = const_754_promoted)[name = string("op_13535")]; + bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; + tensor input_401 = concat(axis = var_13533, interleave = input_401_interleave_0, values = (hidden_states_225, var_13535))[name = string("input_401")]; + tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; + fp16 var_13530_to_fp16 = const()[name = string("op_13530_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_13530_to_fp16, x = input_401)[name = string("normed_357_cast_fp16")]; + tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_359 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359")]; + tensor const_757 = const()[name = string("const_757"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453773824)))]; + tensor q_45 = mul(x = normed_359, y = const_757)[name = string("q_45")]; + int32 var_13558 = const()[name = string("op_13558"), val = int32(-1)]; + fp16 const_758_promoted = const()[name = string("const_758_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_227 = transpose(perm = var_13511, x = var_13506)[name = string("transpose_51")]; + tensor var_13560 = mul(x = hidden_states_227, y = const_758_promoted)[name = string("op_13560")]; + bool input_403_interleave_0 = const()[name = string("input_403_interleave_0"), val = bool(false)]; + tensor input_403 = concat(axis = var_13558, interleave = input_403_interleave_0, values = (hidden_states_227, var_13560))[name = string("input_403")]; + tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; + fp16 var_13555_to_fp16 = const()[name = string("op_13555_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_13555_to_fp16, x = input_403)[name = string("normed_361_cast_fp16")]; + tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_363 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363")]; + tensor const_761 = const()[name = string("const_761"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453774144)))]; + tensor k_45 = mul(x = normed_363, y = const_761)[name = string("k_45")]; + tensor var_13586 = mul(x = q_45, y = cos_5)[name = string("op_13586")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45)[name = string("x1_89")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45)[name = string("x2_89")]; + fp16 const_764_promoted = const()[name = string("const_764_promoted"), val = fp16(-0x1p+0)]; + tensor var_13607 = mul(x = x2_89, y = const_764_promoted)[name = string("op_13607")]; + int32 var_13609 = const()[name = string("op_13609"), val = int32(-1)]; + bool var_13610_interleave_0 = const()[name = string("op_13610_interleave_0"), val = bool(false)]; + tensor var_13610 = concat(axis = var_13609, interleave = var_13610_interleave_0, values = (var_13607, x1_89))[name = string("op_13610")]; + tensor var_13611 = mul(x = var_13610, y = sin_5)[name = string("op_13611")]; + tensor query_states_179 = add(x = var_13586, y = var_13611)[name = string("query_states_179")]; + tensor var_13614 = mul(x = k_45, y = cos_5)[name = string("op_13614")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45)[name = string("x1_91")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45)[name = string("x2_91")]; + fp16 const_767_promoted = const()[name = string("const_767_promoted"), val = fp16(-0x1p+0)]; + tensor var_13635 = mul(x = x2_91, y = const_767_promoted)[name = string("op_13635")]; + int32 var_13637 = const()[name = string("op_13637"), val = int32(-1)]; + bool var_13638_interleave_0 = const()[name = string("op_13638_interleave_0"), val = bool(false)]; + tensor var_13638 = concat(axis = var_13637, interleave = var_13638_interleave_0, values = (var_13635, x1_91))[name = string("op_13638")]; + tensor var_13639 = mul(x = var_13638, y = sin_5)[name = string("op_13639")]; + tensor key_states_223 = add(x = var_13614, y = var_13639)[name = string("key_states_223")]; + tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([22])]; + tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; + tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; + tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([23])]; + int32 concat_398_axis_0 = const()[name = string("concat_398_axis_0"), val = int32(0)]; + bool concat_398_interleave_0 = const()[name = string("concat_398_interleave_0"), val = bool(false)]; + tensor concat_398 = concat(axis = concat_398_axis_0, interleave = concat_398_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_398")]; + tensor concat_399_values1_0 = const()[name = string("concat_399_values1_0"), val = tensor([0])]; + tensor concat_399_values3_0 = const()[name = string("concat_399_values3_0"), val = tensor([0])]; + int32 concat_399_axis_0 = const()[name = string("concat_399_axis_0"), val = int32(0)]; + bool concat_399_interleave_0 = const()[name = string("concat_399_interleave_0"), val = bool(false)]; + tensor concat_399 = concat(axis = concat_399_axis_0, interleave = concat_399_interleave_0, values = (expand_dims_268, concat_399_values1_0, var_1781, concat_399_values3_0))[name = string("concat_399")]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16 = slice_update(begin = concat_398, begin_mask = model_model_kv_cache_0_internal_tensor_assign_45_begin_mask_0, end = concat_399, end_mask = model_model_kv_cache_0_internal_tensor_assign_45_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_45_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_45_stride_0, update = key_states_223, x = coreml_update_state_99)[name = string("model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_45_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_100")]; + tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([50])]; + tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; + tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; + tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([51])]; + int32 concat_402_axis_0 = const()[name = string("concat_402_axis_0"), val = int32(0)]; + bool concat_402_interleave_0 = const()[name = string("concat_402_interleave_0"), val = bool(false)]; + tensor concat_402 = concat(axis = concat_402_axis_0, interleave = concat_402_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_402")]; + tensor concat_403_values1_0 = const()[name = string("concat_403_values1_0"), val = tensor([0])]; + tensor concat_403_values3_0 = const()[name = string("concat_403_values3_0"), val = tensor([0])]; + int32 concat_403_axis_0 = const()[name = string("concat_403_axis_0"), val = int32(0)]; + bool concat_403_interleave_0 = const()[name = string("concat_403_interleave_0"), val = bool(false)]; + tensor concat_403 = concat(axis = concat_403_axis_0, interleave = concat_403_interleave_0, values = (expand_dims_274, concat_403_values1_0, var_1781, concat_403_values3_0))[name = string("concat_403")]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_179 = transpose(perm = var_13522, x = var_13517)[name = string("transpose_50")]; + tensor model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16 = slice_update(begin = concat_402, begin_mask = model_model_kv_cache_0_internal_tensor_assign_46_begin_mask_0, end = concat_403, end_mask = model_model_kv_cache_0_internal_tensor_assign_46_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_46_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_46_stride_0, update = value_states_179, x = coreml_update_state_100)[name = string("model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_46_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_101")]; + tensor var_13710_begin_0 = const()[name = string("op_13710_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_13710_end_0 = const()[name = string("op_13710_end_0"), val = tensor([23, 8, 4096, 128])]; + tensor var_13710_end_mask_0 = const()[name = string("op_13710_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13710_cast_fp16 = slice_by_index(begin = var_13710_begin_0, end = var_13710_end_0, end_mask = var_13710_end_mask_0, x = coreml_update_state_101)[name = string("op_13710_cast_fp16")]; + tensor K_layer_cache_45_axes_0 = const()[name = string("K_layer_cache_45_axes_0"), val = tensor([0])]; + tensor K_layer_cache_45_cast_fp16 = squeeze(axes = K_layer_cache_45_axes_0, x = var_13710_cast_fp16)[name = string("K_layer_cache_45_cast_fp16")]; + tensor var_13717_begin_0 = const()[name = string("op_13717_begin_0"), val = tensor([50, 0, 0, 0])]; + tensor var_13717_end_0 = const()[name = string("op_13717_end_0"), val = tensor([51, 8, 4096, 128])]; + tensor var_13717_end_mask_0 = const()[name = string("op_13717_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_13717_cast_fp16 = slice_by_index(begin = var_13717_begin_0, end = var_13717_end_0, end_mask = var_13717_end_mask_0, x = coreml_update_state_101)[name = string("op_13717_cast_fp16")]; + tensor V_layer_cache_45_axes_0 = const()[name = string("V_layer_cache_45_axes_0"), val = tensor([0])]; + tensor V_layer_cache_45_cast_fp16 = squeeze(axes = V_layer_cache_45_axes_0, x = var_13717_cast_fp16)[name = string("V_layer_cache_45_cast_fp16")]; + tensor x_355_axes_0 = const()[name = string("x_355_axes_0"), val = tensor([1])]; + tensor x_355_cast_fp16 = expand_dims(axes = x_355_axes_0, x = K_layer_cache_45_cast_fp16)[name = string("x_355_cast_fp16")]; + tensor var_13746 = const()[name = string("op_13746"), val = tensor([1, 2, 1, 1])]; + tensor x_357_cast_fp16 = tile(reps = var_13746, x = x_355_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_13758 = const()[name = string("op_13758"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_227_cast_fp16 = reshape(shape = var_13758, x = x_357_cast_fp16)[name = string("key_states_227_cast_fp16")]; + tensor x_361_axes_0 = const()[name = string("x_361_axes_0"), val = tensor([1])]; + tensor x_361_cast_fp16 = expand_dims(axes = x_361_axes_0, x = V_layer_cache_45_cast_fp16)[name = string("x_361_cast_fp16")]; + tensor var_13766 = const()[name = string("op_13766"), val = tensor([1, 2, 1, 1])]; + tensor x_363_cast_fp16 = tile(reps = var_13766, x = x_361_cast_fp16)[name = string("x_363_cast_fp16")]; + bool var_13793_transpose_x_0 = const()[name = string("op_13793_transpose_x_0"), val = bool(false)]; + bool var_13793_transpose_y_0 = const()[name = string("op_13793_transpose_y_0"), val = bool(true)]; + tensor var_13793 = matmul(transpose_x = var_13793_transpose_x_0, transpose_y = var_13793_transpose_y_0, x = query_states_179, y = key_states_227_cast_fp16)[name = string("op_13793")]; + fp16 var_13794_to_fp16 = const()[name = string("op_13794_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_89_cast_fp16 = mul(x = var_13793, y = var_13794_to_fp16)[name = string("attn_weights_89_cast_fp16")]; + tensor attn_weights_91_cast_fp16 = add(x = attn_weights_89_cast_fp16, y = causal_mask)[name = string("attn_weights_91_cast_fp16")]; + int32 var_13829 = const()[name = string("op_13829"), val = int32(-1)]; + tensor var_13831_cast_fp16 = softmax(axis = var_13829, x = attn_weights_91_cast_fp16)[name = string("op_13831_cast_fp16")]; + tensor concat_408 = const()[name = string("concat_408"), val = tensor([16, 64, 4096])]; + tensor reshape_66_cast_fp16 = reshape(shape = concat_408, x = var_13831_cast_fp16)[name = string("reshape_66_cast_fp16")]; + tensor concat_409 = const()[name = string("concat_409"), val = tensor([16, 4096, 128])]; + tensor reshape_67_cast_fp16 = reshape(shape = concat_409, x = x_363_cast_fp16)[name = string("reshape_67_cast_fp16")]; + bool matmul_22_transpose_x_0 = const()[name = string("matmul_22_transpose_x_0"), val = bool(false)]; + bool matmul_22_transpose_y_0 = const()[name = string("matmul_22_transpose_y_0"), val = bool(false)]; + tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_0, transpose_y = matmul_22_transpose_y_0, x = reshape_66_cast_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")]; + tensor concat_413 = const()[name = string("concat_413"), val = tensor([1, 16, 64, 128])]; + tensor reshape_68_cast_fp16 = reshape(shape = concat_413, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")]; + tensor var_13843_perm_0 = const()[name = string("op_13843_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_13862 = const()[name = string("op_13862"), val = tensor([1, 64, 2048])]; + tensor var_13843_cast_fp16 = transpose(perm = var_13843_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_49")]; + tensor attn_output_225_cast_fp16 = reshape(shape = var_13862, x = var_13843_cast_fp16)[name = string("attn_output_225_cast_fp16")]; + tensor var_13867 = const()[name = string("op_13867"), val = tensor([0, 2, 1])]; + string var_13883_pad_type_0 = const()[name = string("op_13883_pad_type_0"), val = string("valid")]; + int32 var_13883_groups_0 = const()[name = string("op_13883_groups_0"), val = int32(1)]; + tensor var_13883_strides_0 = const()[name = string("op_13883_strides_0"), val = tensor([1])]; + tensor var_13883_pad_0 = const()[name = string("op_13883_pad_0"), val = tensor([0, 0])]; + tensor var_13883_dilations_0 = const()[name = string("op_13883_dilations_0"), val = tensor([1])]; + tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453774464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455871680))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_13868_cast_fp16 = transpose(perm = var_13867, x = attn_output_225_cast_fp16)[name = string("transpose_48")]; + tensor var_13883_cast_fp16 = conv(dilations = var_13883_dilations_0, groups = var_13883_groups_0, pad = var_13883_pad_0, pad_type = var_13883_pad_type_0, strides = var_13883_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_13868_cast_fp16)[name = string("op_13883_cast_fp16")]; + tensor var_13887 = const()[name = string("op_13887"), val = tensor([0, 2, 1])]; + tensor attn_output_229_cast_fp16 = transpose(perm = var_13887, x = var_13883_cast_fp16)[name = string("transpose_47")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_221_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + int32 var_13900 = const()[name = string("op_13900"), val = int32(-1)]; + fp16 const_779_promoted_to_fp16 = const()[name = string("const_779_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13902_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = const_779_promoted_to_fp16)[name = string("op_13902_cast_fp16")]; + bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; + tensor input_407_cast_fp16 = concat(axis = var_13900, interleave = input_407_interleave_0, values = (hidden_states_229_cast_fp16, var_13902_cast_fp16))[name = string("input_407_cast_fp16")]; + tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; + fp16 var_13897_to_fp16 = const()[name = string("op_13897_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_13897_to_fp16, x = input_407_cast_fp16)[name = string("normed_365_cast_fp16")]; + tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_367_cast_fp16 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367_cast_fp16")]; + tensor const_782_promoted_to_fp16 = const()[name = string("const_782_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455937280)))]; + tensor x_365_cast_fp16 = mul(x = normed_367_cast_fp16, y = const_782_promoted_to_fp16)[name = string("x_365_cast_fp16")]; + tensor var_13927 = const()[name = string("op_13927"), val = tensor([0, 2, 1])]; + tensor input_409_axes_0 = const()[name = string("input_409_axes_0"), val = tensor([2])]; + tensor var_13928 = transpose(perm = var_13927, x = x_365_cast_fp16)[name = string("transpose_46")]; + tensor input_409 = expand_dims(axes = input_409_axes_0, x = var_13928)[name = string("input_409")]; + string input_411_pad_type_0 = const()[name = string("input_411_pad_type_0"), val = string("valid")]; + tensor input_411_strides_0 = const()[name = string("input_411_strides_0"), val = tensor([1, 1])]; + tensor input_411_pad_0 = const()[name = string("input_411_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_411_dilations_0 = const()[name = string("input_411_dilations_0"), val = tensor([1, 1])]; + int32 input_411_groups_0 = const()[name = string("input_411_groups_0"), val = int32(1)]; + tensor input_411 = conv(dilations = input_411_dilations_0, groups = input_411_groups_0, pad = input_411_pad_0, pad_type = input_411_pad_type_0, strides = input_411_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_palettized, x = input_409)[name = string("input_411")]; + string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; + tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; + tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; + int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; + tensor b_45 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_palettized, x = input_409)[name = string("b_45")]; + tensor c_45 = silu(x = input_411)[name = string("c_45")]; + tensor input_413 = mul(x = c_45, y = b_45)[name = string("input_413")]; + string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; + tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; + tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; + int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; + tensor e_45 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_palettized, x = input_413)[name = string("e_45")]; + tensor var_13950_axes_0 = const()[name = string("op_13950_axes_0"), val = tensor([2])]; + tensor var_13950 = squeeze(axes = var_13950_axes_0, x = e_45)[name = string("op_13950")]; + tensor var_13951 = const()[name = string("op_13951"), val = tensor([0, 2, 1])]; + tensor var_13952 = transpose(perm = var_13951, x = var_13950)[name = string("transpose_45")]; + tensor hidden_states_231_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = var_13952)[name = string("hidden_states_231_cast_fp16")]; + int32 var_13964 = const()[name = string("op_13964"), val = int32(-1)]; + fp16 const_783_promoted_to_fp16 = const()[name = string("const_783_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_13966_cast_fp16 = mul(x = hidden_states_231_cast_fp16, y = const_783_promoted_to_fp16)[name = string("op_13966_cast_fp16")]; + bool input_415_interleave_0 = const()[name = string("input_415_interleave_0"), val = bool(false)]; + tensor input_415_cast_fp16 = concat(axis = var_13964, interleave = input_415_interleave_0, values = (hidden_states_231_cast_fp16, var_13966_cast_fp16))[name = string("input_415_cast_fp16")]; + tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; + fp16 var_13961_to_fp16 = const()[name = string("op_13961_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_13961_to_fp16, x = input_415_cast_fp16)[name = string("normed_369_cast_fp16")]; + tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_371_cast_fp16 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371_cast_fp16")]; + tensor const_786_promoted_to_fp16 = const()[name = string("const_786_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455939392)))]; + tensor hidden_states_233_cast_fp16 = mul(x = normed_371_cast_fp16, y = const_786_promoted_to_fp16)[name = string("hidden_states_233_cast_fp16")]; + tensor var_13989 = const()[name = string("op_13989"), val = tensor([0, 2, 1])]; + tensor var_13992_axes_0 = const()[name = string("op_13992_axes_0"), val = tensor([2])]; + tensor var_13990_cast_fp16 = transpose(perm = var_13989, x = hidden_states_233_cast_fp16)[name = string("transpose_44")]; + tensor var_13992_cast_fp16 = expand_dims(axes = var_13992_axes_0, x = var_13990_cast_fp16)[name = string("op_13992_cast_fp16")]; + string query_states_185_pad_type_0 = const()[name = string("query_states_185_pad_type_0"), val = string("valid")]; + tensor query_states_185_strides_0 = const()[name = string("query_states_185_strides_0"), val = tensor([1, 1])]; + tensor query_states_185_pad_0 = const()[name = string("query_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_185_dilations_0 = const()[name = string("query_states_185_dilations_0"), val = tensor([1, 1])]; + int32 query_states_185_groups_0 = const()[name = string("query_states_185_groups_0"), val = int32(1)]; + tensor query_states_185 = conv(dilations = query_states_185_dilations_0, groups = query_states_185_groups_0, pad = query_states_185_pad_0, pad_type = query_states_185_pad_type_0, strides = query_states_185_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_13992_cast_fp16)[name = string("query_states_185")]; + string key_states_231_pad_type_0 = const()[name = string("key_states_231_pad_type_0"), val = string("valid")]; + tensor key_states_231_strides_0 = const()[name = string("key_states_231_strides_0"), val = tensor([1, 1])]; + tensor key_states_231_pad_0 = const()[name = string("key_states_231_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_231_dilations_0 = const()[name = string("key_states_231_dilations_0"), val = tensor([1, 1])]; + int32 key_states_231_groups_0 = const()[name = string("key_states_231_groups_0"), val = int32(1)]; + tensor key_states_231 = conv(dilations = key_states_231_dilations_0, groups = key_states_231_groups_0, pad = key_states_231_pad_0, pad_type = key_states_231_pad_type_0, strides = key_states_231_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_13992_cast_fp16)[name = string("key_states_231")]; + string value_states_185_pad_type_0 = const()[name = string("value_states_185_pad_type_0"), val = string("valid")]; + tensor value_states_185_strides_0 = const()[name = string("value_states_185_strides_0"), val = tensor([1, 1])]; + tensor value_states_185_pad_0 = const()[name = string("value_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_185_dilations_0 = const()[name = string("value_states_185_dilations_0"), val = tensor([1, 1])]; + int32 value_states_185_groups_0 = const()[name = string("value_states_185_groups_0"), val = int32(1)]; + tensor value_states_185 = conv(dilations = value_states_185_dilations_0, groups = value_states_185_groups_0, pad = value_states_185_pad_0, pad_type = value_states_185_pad_type_0, strides = value_states_185_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_13992_cast_fp16)[name = string("value_states_185")]; + tensor var_14034 = const()[name = string("op_14034"), val = tensor([1, 16, 128, 64])]; + tensor var_14035 = reshape(shape = var_14034, x = query_states_185)[name = string("op_14035")]; + tensor var_14040 = const()[name = string("op_14040"), val = tensor([0, 1, 3, 2])]; + tensor var_14045 = const()[name = string("op_14045"), val = tensor([1, 8, 128, 64])]; + tensor var_14046 = reshape(shape = var_14045, x = key_states_231)[name = string("op_14046")]; + tensor var_14051 = const()[name = string("op_14051"), val = tensor([0, 1, 3, 2])]; + tensor var_14056 = const()[name = string("op_14056"), val = tensor([1, 8, 128, 64])]; + tensor var_14057 = reshape(shape = var_14056, x = value_states_185)[name = string("op_14057")]; + tensor var_14062 = const()[name = string("op_14062"), val = tensor([0, 1, 3, 2])]; + int32 var_14073 = const()[name = string("op_14073"), val = int32(-1)]; + fp16 const_788_promoted = const()[name = string("const_788_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_235 = transpose(perm = var_14040, x = var_14035)[name = string("transpose_43")]; + tensor var_14075 = mul(x = hidden_states_235, y = const_788_promoted)[name = string("op_14075")]; + bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; + tensor input_419 = concat(axis = var_14073, interleave = input_419_interleave_0, values = (hidden_states_235, var_14075))[name = string("input_419")]; + tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; + fp16 var_14070_to_fp16 = const()[name = string("op_14070_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_14070_to_fp16, x = input_419)[name = string("normed_373_cast_fp16")]; + tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_375 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375")]; + tensor const_791 = const()[name = string("const_791"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455941504)))]; + tensor q_47 = mul(x = normed_375, y = const_791)[name = string("q_47")]; + int32 var_14098 = const()[name = string("op_14098"), val = int32(-1)]; + fp16 const_792_promoted = const()[name = string("const_792_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_237 = transpose(perm = var_14051, x = var_14046)[name = string("transpose_42")]; + tensor var_14100 = mul(x = hidden_states_237, y = const_792_promoted)[name = string("op_14100")]; + bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; + tensor input_421 = concat(axis = var_14098, interleave = input_421_interleave_0, values = (hidden_states_237, var_14100))[name = string("input_421")]; + tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; + fp16 var_14095_to_fp16 = const()[name = string("op_14095_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_14095_to_fp16, x = input_421)[name = string("normed_377_cast_fp16")]; + tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_379 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379")]; + tensor const_795 = const()[name = string("const_795"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455941824)))]; + tensor k_47 = mul(x = normed_379, y = const_795)[name = string("k_47")]; + tensor var_14126 = mul(x = q_47, y = cos_5)[name = string("op_14126")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47)[name = string("x1_93")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47)[name = string("x2_93")]; + fp16 const_798_promoted = const()[name = string("const_798_promoted"), val = fp16(-0x1p+0)]; + tensor var_14147 = mul(x = x2_93, y = const_798_promoted)[name = string("op_14147")]; + int32 var_14149 = const()[name = string("op_14149"), val = int32(-1)]; + bool var_14150_interleave_0 = const()[name = string("op_14150_interleave_0"), val = bool(false)]; + tensor var_14150 = concat(axis = var_14149, interleave = var_14150_interleave_0, values = (var_14147, x1_93))[name = string("op_14150")]; + tensor var_14151 = mul(x = var_14150, y = sin_5)[name = string("op_14151")]; + tensor query_states_187 = add(x = var_14126, y = var_14151)[name = string("query_states_187")]; + tensor var_14154 = mul(x = k_47, y = cos_5)[name = string("op_14154")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47)[name = string("x1_95")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47)[name = string("x2_95")]; + fp16 const_801_promoted = const()[name = string("const_801_promoted"), val = fp16(-0x1p+0)]; + tensor var_14175 = mul(x = x2_95, y = const_801_promoted)[name = string("op_14175")]; + int32 var_14177 = const()[name = string("op_14177"), val = int32(-1)]; + bool var_14178_interleave_0 = const()[name = string("op_14178_interleave_0"), val = bool(false)]; + tensor var_14178 = concat(axis = var_14177, interleave = var_14178_interleave_0, values = (var_14175, x1_95))[name = string("op_14178")]; + tensor var_14179 = mul(x = var_14178, y = sin_5)[name = string("op_14179")]; + tensor key_states_233 = add(x = var_14154, y = var_14179)[name = string("key_states_233")]; + tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([23])]; + tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; + tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; + tensor expand_dims_280 = const()[name = string("expand_dims_280"), val = tensor([24])]; + int32 concat_416_axis_0 = const()[name = string("concat_416_axis_0"), val = int32(0)]; + bool concat_416_interleave_0 = const()[name = string("concat_416_interleave_0"), val = bool(false)]; + tensor concat_416 = concat(axis = concat_416_axis_0, interleave = concat_416_interleave_0, values = (expand_dims_276, expand_dims_277, current_pos, expand_dims_279))[name = string("concat_416")]; + tensor concat_417_values1_0 = const()[name = string("concat_417_values1_0"), val = tensor([0])]; + tensor concat_417_values3_0 = const()[name = string("concat_417_values3_0"), val = tensor([0])]; + int32 concat_417_axis_0 = const()[name = string("concat_417_axis_0"), val = int32(0)]; + bool concat_417_interleave_0 = const()[name = string("concat_417_interleave_0"), val = bool(false)]; + tensor concat_417 = concat(axis = concat_417_axis_0, interleave = concat_417_interleave_0, values = (expand_dims_280, concat_417_values1_0, var_1781, concat_417_values3_0))[name = string("concat_417")]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16 = slice_update(begin = concat_416, begin_mask = model_model_kv_cache_0_internal_tensor_assign_47_begin_mask_0, end = concat_417, end_mask = model_model_kv_cache_0_internal_tensor_assign_47_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_47_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_47_stride_0, update = key_states_233, x = coreml_update_state_101)[name = string("model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_47_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_102")]; + tensor expand_dims_282 = const()[name = string("expand_dims_282"), val = tensor([51])]; + tensor expand_dims_283 = const()[name = string("expand_dims_283"), val = tensor([0])]; + tensor expand_dims_285 = const()[name = string("expand_dims_285"), val = tensor([0])]; + tensor expand_dims_286 = const()[name = string("expand_dims_286"), val = tensor([52])]; + int32 concat_420_axis_0 = const()[name = string("concat_420_axis_0"), val = int32(0)]; + bool concat_420_interleave_0 = const()[name = string("concat_420_interleave_0"), val = bool(false)]; + tensor concat_420 = concat(axis = concat_420_axis_0, interleave = concat_420_interleave_0, values = (expand_dims_282, expand_dims_283, current_pos, expand_dims_285))[name = string("concat_420")]; + tensor concat_421_values1_0 = const()[name = string("concat_421_values1_0"), val = tensor([0])]; + tensor concat_421_values3_0 = const()[name = string("concat_421_values3_0"), val = tensor([0])]; + int32 concat_421_axis_0 = const()[name = string("concat_421_axis_0"), val = int32(0)]; + bool concat_421_interleave_0 = const()[name = string("concat_421_interleave_0"), val = bool(false)]; + tensor concat_421 = concat(axis = concat_421_axis_0, interleave = concat_421_interleave_0, values = (expand_dims_286, concat_421_values1_0, var_1781, concat_421_values3_0))[name = string("concat_421")]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_187 = transpose(perm = var_14062, x = var_14057)[name = string("transpose_41")]; + tensor model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16 = slice_update(begin = concat_420, begin_mask = model_model_kv_cache_0_internal_tensor_assign_48_begin_mask_0, end = concat_421, end_mask = model_model_kv_cache_0_internal_tensor_assign_48_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_48_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_48_stride_0, update = value_states_187, x = coreml_update_state_102)[name = string("model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_48_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_103")]; + tensor var_14250_begin_0 = const()[name = string("op_14250_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_14250_end_0 = const()[name = string("op_14250_end_0"), val = tensor([24, 8, 4096, 128])]; + tensor var_14250_end_mask_0 = const()[name = string("op_14250_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14250_cast_fp16 = slice_by_index(begin = var_14250_begin_0, end = var_14250_end_0, end_mask = var_14250_end_mask_0, x = coreml_update_state_103)[name = string("op_14250_cast_fp16")]; + tensor K_layer_cache_47_axes_0 = const()[name = string("K_layer_cache_47_axes_0"), val = tensor([0])]; + tensor K_layer_cache_47_cast_fp16 = squeeze(axes = K_layer_cache_47_axes_0, x = var_14250_cast_fp16)[name = string("K_layer_cache_47_cast_fp16")]; + tensor var_14257_begin_0 = const()[name = string("op_14257_begin_0"), val = tensor([51, 0, 0, 0])]; + tensor var_14257_end_0 = const()[name = string("op_14257_end_0"), val = tensor([52, 8, 4096, 128])]; + tensor var_14257_end_mask_0 = const()[name = string("op_14257_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14257_cast_fp16 = slice_by_index(begin = var_14257_begin_0, end = var_14257_end_0, end_mask = var_14257_end_mask_0, x = coreml_update_state_103)[name = string("op_14257_cast_fp16")]; + tensor V_layer_cache_47_axes_0 = const()[name = string("V_layer_cache_47_axes_0"), val = tensor([0])]; + tensor V_layer_cache_47_cast_fp16 = squeeze(axes = V_layer_cache_47_axes_0, x = var_14257_cast_fp16)[name = string("V_layer_cache_47_cast_fp16")]; + tensor x_371_axes_0 = const()[name = string("x_371_axes_0"), val = tensor([1])]; + tensor x_371_cast_fp16 = expand_dims(axes = x_371_axes_0, x = K_layer_cache_47_cast_fp16)[name = string("x_371_cast_fp16")]; + tensor var_14286 = const()[name = string("op_14286"), val = tensor([1, 2, 1, 1])]; + tensor x_373_cast_fp16 = tile(reps = var_14286, x = x_371_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_14298 = const()[name = string("op_14298"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_237_cast_fp16 = reshape(shape = var_14298, x = x_373_cast_fp16)[name = string("key_states_237_cast_fp16")]; + tensor x_377_axes_0 = const()[name = string("x_377_axes_0"), val = tensor([1])]; + tensor x_377_cast_fp16 = expand_dims(axes = x_377_axes_0, x = V_layer_cache_47_cast_fp16)[name = string("x_377_cast_fp16")]; + tensor var_14306 = const()[name = string("op_14306"), val = tensor([1, 2, 1, 1])]; + tensor x_379_cast_fp16 = tile(reps = var_14306, x = x_377_cast_fp16)[name = string("x_379_cast_fp16")]; + bool var_14333_transpose_x_0 = const()[name = string("op_14333_transpose_x_0"), val = bool(false)]; + bool var_14333_transpose_y_0 = const()[name = string("op_14333_transpose_y_0"), val = bool(true)]; + tensor var_14333 = matmul(transpose_x = var_14333_transpose_x_0, transpose_y = var_14333_transpose_y_0, x = query_states_187, y = key_states_237_cast_fp16)[name = string("op_14333")]; + fp16 var_14334_to_fp16 = const()[name = string("op_14334_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_93_cast_fp16 = mul(x = var_14333, y = var_14334_to_fp16)[name = string("attn_weights_93_cast_fp16")]; + tensor attn_weights_95_cast_fp16 = add(x = attn_weights_93_cast_fp16, y = causal_mask)[name = string("attn_weights_95_cast_fp16")]; + int32 var_14369 = const()[name = string("op_14369"), val = int32(-1)]; + tensor var_14371_cast_fp16 = softmax(axis = var_14369, x = attn_weights_95_cast_fp16)[name = string("op_14371_cast_fp16")]; + tensor concat_426 = const()[name = string("concat_426"), val = tensor([16, 64, 4096])]; + tensor reshape_69_cast_fp16 = reshape(shape = concat_426, x = var_14371_cast_fp16)[name = string("reshape_69_cast_fp16")]; + tensor concat_427 = const()[name = string("concat_427"), val = tensor([16, 4096, 128])]; + tensor reshape_70_cast_fp16 = reshape(shape = concat_427, x = x_379_cast_fp16)[name = string("reshape_70_cast_fp16")]; + bool matmul_23_transpose_x_0 = const()[name = string("matmul_23_transpose_x_0"), val = bool(false)]; + bool matmul_23_transpose_y_0 = const()[name = string("matmul_23_transpose_y_0"), val = bool(false)]; + tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_0, transpose_y = matmul_23_transpose_y_0, x = reshape_69_cast_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")]; + tensor concat_431 = const()[name = string("concat_431"), val = tensor([1, 16, 64, 128])]; + tensor reshape_71_cast_fp16 = reshape(shape = concat_431, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")]; + tensor var_14383_perm_0 = const()[name = string("op_14383_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14402 = const()[name = string("op_14402"), val = tensor([1, 64, 2048])]; + tensor var_14383_cast_fp16 = transpose(perm = var_14383_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_40")]; + tensor attn_output_235_cast_fp16 = reshape(shape = var_14402, x = var_14383_cast_fp16)[name = string("attn_output_235_cast_fp16")]; + tensor var_14407 = const()[name = string("op_14407"), val = tensor([0, 2, 1])]; + string var_14423_pad_type_0 = const()[name = string("op_14423_pad_type_0"), val = string("valid")]; + int32 var_14423_groups_0 = const()[name = string("op_14423_groups_0"), val = int32(1)]; + tensor var_14423_strides_0 = const()[name = string("op_14423_strides_0"), val = tensor([1])]; + tensor var_14423_pad_0 = const()[name = string("op_14423_pad_0"), val = tensor([0, 0])]; + tensor var_14423_dilations_0 = const()[name = string("op_14423_dilations_0"), val = tensor([1])]; + tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455942144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458039360))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14408_cast_fp16 = transpose(perm = var_14407, x = attn_output_235_cast_fp16)[name = string("transpose_39")]; + tensor var_14423_cast_fp16 = conv(dilations = var_14423_dilations_0, groups = var_14423_groups_0, pad = var_14423_pad_0, pad_type = var_14423_pad_type_0, strides = var_14423_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_14408_cast_fp16)[name = string("op_14423_cast_fp16")]; + tensor var_14427 = const()[name = string("op_14427"), val = tensor([0, 2, 1])]; + tensor attn_output_239_cast_fp16 = transpose(perm = var_14427, x = var_14423_cast_fp16)[name = string("transpose_38")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_231_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + int32 var_14440 = const()[name = string("op_14440"), val = int32(-1)]; + fp16 const_813_promoted_to_fp16 = const()[name = string("const_813_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14442_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = const_813_promoted_to_fp16)[name = string("op_14442_cast_fp16")]; + bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; + tensor input_425_cast_fp16 = concat(axis = var_14440, interleave = input_425_interleave_0, values = (hidden_states_239_cast_fp16, var_14442_cast_fp16))[name = string("input_425_cast_fp16")]; + tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; + fp16 var_14437_to_fp16 = const()[name = string("op_14437_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_14437_to_fp16, x = input_425_cast_fp16)[name = string("normed_381_cast_fp16")]; + tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; + tensor const_816_promoted_to_fp16 = const()[name = string("const_816_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458104960)))]; + tensor x_381_cast_fp16 = mul(x = normed_383_cast_fp16, y = const_816_promoted_to_fp16)[name = string("x_381_cast_fp16")]; + tensor var_14467 = const()[name = string("op_14467"), val = tensor([0, 2, 1])]; + tensor input_427_axes_0 = const()[name = string("input_427_axes_0"), val = tensor([2])]; + tensor var_14468 = transpose(perm = var_14467, x = x_381_cast_fp16)[name = string("transpose_37")]; + tensor input_427 = expand_dims(axes = input_427_axes_0, x = var_14468)[name = string("input_427")]; + string input_429_pad_type_0 = const()[name = string("input_429_pad_type_0"), val = string("valid")]; + tensor input_429_strides_0 = const()[name = string("input_429_strides_0"), val = tensor([1, 1])]; + tensor input_429_pad_0 = const()[name = string("input_429_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_429_dilations_0 = const()[name = string("input_429_dilations_0"), val = tensor([1, 1])]; + int32 input_429_groups_0 = const()[name = string("input_429_groups_0"), val = int32(1)]; + tensor input_429 = conv(dilations = input_429_dilations_0, groups = input_429_groups_0, pad = input_429_pad_0, pad_type = input_429_pad_type_0, strides = input_429_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_palettized, x = input_427)[name = string("input_429")]; + string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; + tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; + tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; + int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; + tensor b_47 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_palettized, x = input_427)[name = string("b_47")]; + tensor c_47 = silu(x = input_429)[name = string("c_47")]; + tensor input_431 = mul(x = c_47, y = b_47)[name = string("input_431")]; + string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; + tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; + tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; + int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; + tensor e_47 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_palettized, x = input_431)[name = string("e_47")]; + tensor var_14490_axes_0 = const()[name = string("op_14490_axes_0"), val = tensor([2])]; + tensor var_14490 = squeeze(axes = var_14490_axes_0, x = e_47)[name = string("op_14490")]; + tensor var_14491 = const()[name = string("op_14491"), val = tensor([0, 2, 1])]; + tensor var_14492 = transpose(perm = var_14491, x = var_14490)[name = string("transpose_36")]; + tensor hidden_states_241_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = var_14492)[name = string("hidden_states_241_cast_fp16")]; + int32 var_14504 = const()[name = string("op_14504"), val = int32(-1)]; + fp16 const_817_promoted_to_fp16 = const()[name = string("const_817_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14506_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_817_promoted_to_fp16)[name = string("op_14506_cast_fp16")]; + bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; + tensor input_433_cast_fp16 = concat(axis = var_14504, interleave = input_433_interleave_0, values = (hidden_states_241_cast_fp16, var_14506_cast_fp16))[name = string("input_433_cast_fp16")]; + tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; + fp16 var_14501_to_fp16 = const()[name = string("op_14501_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_14501_to_fp16, x = input_433_cast_fp16)[name = string("normed_385_cast_fp16")]; + tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; + tensor const_820_promoted_to_fp16 = const()[name = string("const_820_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458107072)))]; + tensor hidden_states_243_cast_fp16 = mul(x = normed_387_cast_fp16, y = const_820_promoted_to_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor var_14529 = const()[name = string("op_14529"), val = tensor([0, 2, 1])]; + tensor var_14532_axes_0 = const()[name = string("op_14532_axes_0"), val = tensor([2])]; + tensor var_14530_cast_fp16 = transpose(perm = var_14529, x = hidden_states_243_cast_fp16)[name = string("transpose_35")]; + tensor var_14532_cast_fp16 = expand_dims(axes = var_14532_axes_0, x = var_14530_cast_fp16)[name = string("op_14532_cast_fp16")]; + string query_states_193_pad_type_0 = const()[name = string("query_states_193_pad_type_0"), val = string("valid")]; + tensor query_states_193_strides_0 = const()[name = string("query_states_193_strides_0"), val = tensor([1, 1])]; + tensor query_states_193_pad_0 = const()[name = string("query_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_193_dilations_0 = const()[name = string("query_states_193_dilations_0"), val = tensor([1, 1])]; + int32 query_states_193_groups_0 = const()[name = string("query_states_193_groups_0"), val = int32(1)]; + tensor query_states_193 = conv(dilations = query_states_193_dilations_0, groups = query_states_193_groups_0, pad = query_states_193_pad_0, pad_type = query_states_193_pad_type_0, strides = query_states_193_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_14532_cast_fp16)[name = string("query_states_193")]; + string key_states_241_pad_type_0 = const()[name = string("key_states_241_pad_type_0"), val = string("valid")]; + tensor key_states_241_strides_0 = const()[name = string("key_states_241_strides_0"), val = tensor([1, 1])]; + tensor key_states_241_pad_0 = const()[name = string("key_states_241_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_241_dilations_0 = const()[name = string("key_states_241_dilations_0"), val = tensor([1, 1])]; + int32 key_states_241_groups_0 = const()[name = string("key_states_241_groups_0"), val = int32(1)]; + tensor key_states_241 = conv(dilations = key_states_241_dilations_0, groups = key_states_241_groups_0, pad = key_states_241_pad_0, pad_type = key_states_241_pad_type_0, strides = key_states_241_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_14532_cast_fp16)[name = string("key_states_241")]; + string value_states_193_pad_type_0 = const()[name = string("value_states_193_pad_type_0"), val = string("valid")]; + tensor value_states_193_strides_0 = const()[name = string("value_states_193_strides_0"), val = tensor([1, 1])]; + tensor value_states_193_pad_0 = const()[name = string("value_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_193_dilations_0 = const()[name = string("value_states_193_dilations_0"), val = tensor([1, 1])]; + int32 value_states_193_groups_0 = const()[name = string("value_states_193_groups_0"), val = int32(1)]; + tensor value_states_193 = conv(dilations = value_states_193_dilations_0, groups = value_states_193_groups_0, pad = value_states_193_pad_0, pad_type = value_states_193_pad_type_0, strides = value_states_193_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_14532_cast_fp16)[name = string("value_states_193")]; + tensor var_14574 = const()[name = string("op_14574"), val = tensor([1, 16, 128, 64])]; + tensor var_14575 = reshape(shape = var_14574, x = query_states_193)[name = string("op_14575")]; + tensor var_14580 = const()[name = string("op_14580"), val = tensor([0, 1, 3, 2])]; + tensor var_14585 = const()[name = string("op_14585"), val = tensor([1, 8, 128, 64])]; + tensor var_14586 = reshape(shape = var_14585, x = key_states_241)[name = string("op_14586")]; + tensor var_14591 = const()[name = string("op_14591"), val = tensor([0, 1, 3, 2])]; + tensor var_14596 = const()[name = string("op_14596"), val = tensor([1, 8, 128, 64])]; + tensor var_14597 = reshape(shape = var_14596, x = value_states_193)[name = string("op_14597")]; + tensor var_14602 = const()[name = string("op_14602"), val = tensor([0, 1, 3, 2])]; + int32 var_14613 = const()[name = string("op_14613"), val = int32(-1)]; + fp16 const_822_promoted = const()[name = string("const_822_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_245 = transpose(perm = var_14580, x = var_14575)[name = string("transpose_34")]; + tensor var_14615 = mul(x = hidden_states_245, y = const_822_promoted)[name = string("op_14615")]; + bool input_437_interleave_0 = const()[name = string("input_437_interleave_0"), val = bool(false)]; + tensor input_437 = concat(axis = var_14613, interleave = input_437_interleave_0, values = (hidden_states_245, var_14615))[name = string("input_437")]; + tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; + fp16 var_14610_to_fp16 = const()[name = string("op_14610_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_14610_to_fp16, x = input_437)[name = string("normed_389_cast_fp16")]; + tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; + tensor const_825 = const()[name = string("const_825"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458109184)))]; + tensor q_49 = mul(x = normed_391, y = const_825)[name = string("q_49")]; + int32 var_14638 = const()[name = string("op_14638"), val = int32(-1)]; + fp16 const_826_promoted = const()[name = string("const_826_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_247 = transpose(perm = var_14591, x = var_14586)[name = string("transpose_33")]; + tensor var_14640 = mul(x = hidden_states_247, y = const_826_promoted)[name = string("op_14640")]; + bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; + tensor input_439 = concat(axis = var_14638, interleave = input_439_interleave_0, values = (hidden_states_247, var_14640))[name = string("input_439")]; + tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; + fp16 var_14635_to_fp16 = const()[name = string("op_14635_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_14635_to_fp16, x = input_439)[name = string("normed_393_cast_fp16")]; + tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; + tensor const_829 = const()[name = string("const_829"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458109504)))]; + tensor k_49 = mul(x = normed_395, y = const_829)[name = string("k_49")]; + tensor var_14666 = mul(x = q_49, y = cos_5)[name = string("op_14666")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49)[name = string("x1_97")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49)[name = string("x2_97")]; + fp16 const_832_promoted = const()[name = string("const_832_promoted"), val = fp16(-0x1p+0)]; + tensor var_14687 = mul(x = x2_97, y = const_832_promoted)[name = string("op_14687")]; + int32 var_14689 = const()[name = string("op_14689"), val = int32(-1)]; + bool var_14690_interleave_0 = const()[name = string("op_14690_interleave_0"), val = bool(false)]; + tensor var_14690 = concat(axis = var_14689, interleave = var_14690_interleave_0, values = (var_14687, x1_97))[name = string("op_14690")]; + tensor var_14691 = mul(x = var_14690, y = sin_5)[name = string("op_14691")]; + tensor query_states_195 = add(x = var_14666, y = var_14691)[name = string("query_states_195")]; + tensor var_14694 = mul(x = k_49, y = cos_5)[name = string("op_14694")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49)[name = string("x1_99")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49)[name = string("x2_99")]; + fp16 const_835_promoted = const()[name = string("const_835_promoted"), val = fp16(-0x1p+0)]; + tensor var_14715 = mul(x = x2_99, y = const_835_promoted)[name = string("op_14715")]; + int32 var_14717 = const()[name = string("op_14717"), val = int32(-1)]; + bool var_14718_interleave_0 = const()[name = string("op_14718_interleave_0"), val = bool(false)]; + tensor var_14718 = concat(axis = var_14717, interleave = var_14718_interleave_0, values = (var_14715, x1_99))[name = string("op_14718")]; + tensor var_14719 = mul(x = var_14718, y = sin_5)[name = string("op_14719")]; + tensor key_states_243 = add(x = var_14694, y = var_14719)[name = string("key_states_243")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([24])]; + tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; + tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; + tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([25])]; + int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; + bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; + tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_434")]; + tensor concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = tensor([0])]; + tensor concat_435_values3_0 = const()[name = string("concat_435_values3_0"), val = tensor([0])]; + int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; + bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; + tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (expand_dims_292, concat_435_values1_0, var_1781, concat_435_values3_0))[name = string("concat_435")]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16 = slice_update(begin = concat_434, begin_mask = model_model_kv_cache_0_internal_tensor_assign_49_begin_mask_0, end = concat_435, end_mask = model_model_kv_cache_0_internal_tensor_assign_49_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_49_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_49_stride_0, update = key_states_243, x = coreml_update_state_103)[name = string("model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_49_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_104 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_104")]; + tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([52])]; + tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; + tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; + tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([53])]; + int32 concat_438_axis_0 = const()[name = string("concat_438_axis_0"), val = int32(0)]; + bool concat_438_interleave_0 = const()[name = string("concat_438_interleave_0"), val = bool(false)]; + tensor concat_438 = concat(axis = concat_438_axis_0, interleave = concat_438_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_438")]; + tensor concat_439_values1_0 = const()[name = string("concat_439_values1_0"), val = tensor([0])]; + tensor concat_439_values3_0 = const()[name = string("concat_439_values3_0"), val = tensor([0])]; + int32 concat_439_axis_0 = const()[name = string("concat_439_axis_0"), val = int32(0)]; + bool concat_439_interleave_0 = const()[name = string("concat_439_interleave_0"), val = bool(false)]; + tensor concat_439 = concat(axis = concat_439_axis_0, interleave = concat_439_interleave_0, values = (expand_dims_298, concat_439_values1_0, var_1781, concat_439_values3_0))[name = string("concat_439")]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_195 = transpose(perm = var_14602, x = var_14597)[name = string("transpose_32")]; + tensor model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16 = slice_update(begin = concat_438, begin_mask = model_model_kv_cache_0_internal_tensor_assign_50_begin_mask_0, end = concat_439, end_mask = model_model_kv_cache_0_internal_tensor_assign_50_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_50_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_50_stride_0, update = value_states_195, x = coreml_update_state_104)[name = string("model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_50_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_105 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_105")]; + tensor var_14790_begin_0 = const()[name = string("op_14790_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor var_14790_end_0 = const()[name = string("op_14790_end_0"), val = tensor([25, 8, 4096, 128])]; + tensor var_14790_end_mask_0 = const()[name = string("op_14790_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14790_cast_fp16 = slice_by_index(begin = var_14790_begin_0, end = var_14790_end_0, end_mask = var_14790_end_mask_0, x = coreml_update_state_105)[name = string("op_14790_cast_fp16")]; + tensor K_layer_cache_49_axes_0 = const()[name = string("K_layer_cache_49_axes_0"), val = tensor([0])]; + tensor K_layer_cache_49_cast_fp16 = squeeze(axes = K_layer_cache_49_axes_0, x = var_14790_cast_fp16)[name = string("K_layer_cache_49_cast_fp16")]; + tensor var_14797_begin_0 = const()[name = string("op_14797_begin_0"), val = tensor([52, 0, 0, 0])]; + tensor var_14797_end_0 = const()[name = string("op_14797_end_0"), val = tensor([53, 8, 4096, 128])]; + tensor var_14797_end_mask_0 = const()[name = string("op_14797_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_14797_cast_fp16 = slice_by_index(begin = var_14797_begin_0, end = var_14797_end_0, end_mask = var_14797_end_mask_0, x = coreml_update_state_105)[name = string("op_14797_cast_fp16")]; + tensor V_layer_cache_49_axes_0 = const()[name = string("V_layer_cache_49_axes_0"), val = tensor([0])]; + tensor V_layer_cache_49_cast_fp16 = squeeze(axes = V_layer_cache_49_axes_0, x = var_14797_cast_fp16)[name = string("V_layer_cache_49_cast_fp16")]; + tensor x_387_axes_0 = const()[name = string("x_387_axes_0"), val = tensor([1])]; + tensor x_387_cast_fp16 = expand_dims(axes = x_387_axes_0, x = K_layer_cache_49_cast_fp16)[name = string("x_387_cast_fp16")]; + tensor var_14826 = const()[name = string("op_14826"), val = tensor([1, 2, 1, 1])]; + tensor x_389_cast_fp16 = tile(reps = var_14826, x = x_387_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_14838 = const()[name = string("op_14838"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_247_cast_fp16 = reshape(shape = var_14838, x = x_389_cast_fp16)[name = string("key_states_247_cast_fp16")]; + tensor x_393_axes_0 = const()[name = string("x_393_axes_0"), val = tensor([1])]; + tensor x_393_cast_fp16 = expand_dims(axes = x_393_axes_0, x = V_layer_cache_49_cast_fp16)[name = string("x_393_cast_fp16")]; + tensor var_14846 = const()[name = string("op_14846"), val = tensor([1, 2, 1, 1])]; + tensor x_395_cast_fp16 = tile(reps = var_14846, x = x_393_cast_fp16)[name = string("x_395_cast_fp16")]; + bool var_14873_transpose_x_0 = const()[name = string("op_14873_transpose_x_0"), val = bool(false)]; + bool var_14873_transpose_y_0 = const()[name = string("op_14873_transpose_y_0"), val = bool(true)]; + tensor var_14873 = matmul(transpose_x = var_14873_transpose_x_0, transpose_y = var_14873_transpose_y_0, x = query_states_195, y = key_states_247_cast_fp16)[name = string("op_14873")]; + fp16 var_14874_to_fp16 = const()[name = string("op_14874_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_97_cast_fp16 = mul(x = var_14873, y = var_14874_to_fp16)[name = string("attn_weights_97_cast_fp16")]; + tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = causal_mask)[name = string("attn_weights_99_cast_fp16")]; + int32 var_14909 = const()[name = string("op_14909"), val = int32(-1)]; + tensor var_14911_cast_fp16 = softmax(axis = var_14909, x = attn_weights_99_cast_fp16)[name = string("op_14911_cast_fp16")]; + tensor concat_444 = const()[name = string("concat_444"), val = tensor([16, 64, 4096])]; + tensor reshape_72_cast_fp16 = reshape(shape = concat_444, x = var_14911_cast_fp16)[name = string("reshape_72_cast_fp16")]; + tensor concat_445 = const()[name = string("concat_445"), val = tensor([16, 4096, 128])]; + tensor reshape_73_cast_fp16 = reshape(shape = concat_445, x = x_395_cast_fp16)[name = string("reshape_73_cast_fp16")]; + bool matmul_24_transpose_x_0 = const()[name = string("matmul_24_transpose_x_0"), val = bool(false)]; + bool matmul_24_transpose_y_0 = const()[name = string("matmul_24_transpose_y_0"), val = bool(false)]; + tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_0, transpose_y = matmul_24_transpose_y_0, x = reshape_72_cast_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")]; + tensor concat_449 = const()[name = string("concat_449"), val = tensor([1, 16, 64, 128])]; + tensor reshape_74_cast_fp16 = reshape(shape = concat_449, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")]; + tensor var_14923_perm_0 = const()[name = string("op_14923_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_14942 = const()[name = string("op_14942"), val = tensor([1, 64, 2048])]; + tensor var_14923_cast_fp16 = transpose(perm = var_14923_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_31")]; + tensor attn_output_245_cast_fp16 = reshape(shape = var_14942, x = var_14923_cast_fp16)[name = string("attn_output_245_cast_fp16")]; + tensor var_14947 = const()[name = string("op_14947"), val = tensor([0, 2, 1])]; + string var_14963_pad_type_0 = const()[name = string("op_14963_pad_type_0"), val = string("valid")]; + int32 var_14963_groups_0 = const()[name = string("op_14963_groups_0"), val = int32(1)]; + tensor var_14963_strides_0 = const()[name = string("op_14963_strides_0"), val = tensor([1])]; + tensor var_14963_pad_0 = const()[name = string("op_14963_pad_0"), val = tensor([0, 0])]; + tensor var_14963_dilations_0 = const()[name = string("op_14963_dilations_0"), val = tensor([1])]; + tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458109824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460207040))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_14948_cast_fp16 = transpose(perm = var_14947, x = attn_output_245_cast_fp16)[name = string("transpose_30")]; + tensor var_14963_cast_fp16 = conv(dilations = var_14963_dilations_0, groups = var_14963_groups_0, pad = var_14963_pad_0, pad_type = var_14963_pad_type_0, strides = var_14963_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_14948_cast_fp16)[name = string("op_14963_cast_fp16")]; + tensor var_14967 = const()[name = string("op_14967"), val = tensor([0, 2, 1])]; + tensor attn_output_249_cast_fp16 = transpose(perm = var_14967, x = var_14963_cast_fp16)[name = string("transpose_29")]; + tensor hidden_states_249_cast_fp16 = add(x = hidden_states_241_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_249_cast_fp16")]; + int32 var_14980 = const()[name = string("op_14980"), val = int32(-1)]; + fp16 const_847_promoted_to_fp16 = const()[name = string("const_847_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_14982_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = const_847_promoted_to_fp16)[name = string("op_14982_cast_fp16")]; + bool input_443_interleave_0 = const()[name = string("input_443_interleave_0"), val = bool(false)]; + tensor input_443_cast_fp16 = concat(axis = var_14980, interleave = input_443_interleave_0, values = (hidden_states_249_cast_fp16, var_14982_cast_fp16))[name = string("input_443_cast_fp16")]; + tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; + fp16 var_14977_to_fp16 = const()[name = string("op_14977_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_14977_to_fp16, x = input_443_cast_fp16)[name = string("normed_397_cast_fp16")]; + tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; + tensor const_850_promoted_to_fp16 = const()[name = string("const_850_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460272640)))]; + tensor x_397_cast_fp16 = mul(x = normed_399_cast_fp16, y = const_850_promoted_to_fp16)[name = string("x_397_cast_fp16")]; + tensor var_15007 = const()[name = string("op_15007"), val = tensor([0, 2, 1])]; + tensor input_445_axes_0 = const()[name = string("input_445_axes_0"), val = tensor([2])]; + tensor var_15008 = transpose(perm = var_15007, x = x_397_cast_fp16)[name = string("transpose_28")]; + tensor input_445 = expand_dims(axes = input_445_axes_0, x = var_15008)[name = string("input_445")]; + string input_447_pad_type_0 = const()[name = string("input_447_pad_type_0"), val = string("valid")]; + tensor input_447_strides_0 = const()[name = string("input_447_strides_0"), val = tensor([1, 1])]; + tensor input_447_pad_0 = const()[name = string("input_447_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_447_dilations_0 = const()[name = string("input_447_dilations_0"), val = tensor([1, 1])]; + int32 input_447_groups_0 = const()[name = string("input_447_groups_0"), val = int32(1)]; + tensor input_447 = conv(dilations = input_447_dilations_0, groups = input_447_groups_0, pad = input_447_pad_0, pad_type = input_447_pad_type_0, strides = input_447_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_palettized, x = input_445)[name = string("input_447")]; + string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; + tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; + tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; + int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; + tensor b_49 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_palettized, x = input_445)[name = string("b_49")]; + tensor c_49 = silu(x = input_447)[name = string("c_49")]; + tensor input_449 = mul(x = c_49, y = b_49)[name = string("input_449")]; + string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; + tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; + tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; + int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; + tensor e_49 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_palettized, x = input_449)[name = string("e_49")]; + tensor var_15030_axes_0 = const()[name = string("op_15030_axes_0"), val = tensor([2])]; + tensor var_15030 = squeeze(axes = var_15030_axes_0, x = e_49)[name = string("op_15030")]; + tensor var_15031 = const()[name = string("op_15031"), val = tensor([0, 2, 1])]; + tensor var_15032 = transpose(perm = var_15031, x = var_15030)[name = string("transpose_27")]; + tensor hidden_states_251_cast_fp16 = add(x = hidden_states_249_cast_fp16, y = var_15032)[name = string("hidden_states_251_cast_fp16")]; + int32 var_15044 = const()[name = string("op_15044"), val = int32(-1)]; + fp16 const_851_promoted_to_fp16 = const()[name = string("const_851_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15046_cast_fp16 = mul(x = hidden_states_251_cast_fp16, y = const_851_promoted_to_fp16)[name = string("op_15046_cast_fp16")]; + bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; + tensor input_451_cast_fp16 = concat(axis = var_15044, interleave = input_451_interleave_0, values = (hidden_states_251_cast_fp16, var_15046_cast_fp16))[name = string("input_451_cast_fp16")]; + tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; + fp16 var_15041_to_fp16 = const()[name = string("op_15041_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_15041_to_fp16, x = input_451_cast_fp16)[name = string("normed_401_cast_fp16")]; + tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; + tensor const_854_promoted_to_fp16 = const()[name = string("const_854_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460274752)))]; + tensor hidden_states_253_cast_fp16 = mul(x = normed_403_cast_fp16, y = const_854_promoted_to_fp16)[name = string("hidden_states_253_cast_fp16")]; + tensor var_15069 = const()[name = string("op_15069"), val = tensor([0, 2, 1])]; + tensor var_15072_axes_0 = const()[name = string("op_15072_axes_0"), val = tensor([2])]; + tensor var_15070_cast_fp16 = transpose(perm = var_15069, x = hidden_states_253_cast_fp16)[name = string("transpose_26")]; + tensor var_15072_cast_fp16 = expand_dims(axes = var_15072_axes_0, x = var_15070_cast_fp16)[name = string("op_15072_cast_fp16")]; + string query_states_201_pad_type_0 = const()[name = string("query_states_201_pad_type_0"), val = string("valid")]; + tensor query_states_201_strides_0 = const()[name = string("query_states_201_strides_0"), val = tensor([1, 1])]; + tensor query_states_201_pad_0 = const()[name = string("query_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_201_dilations_0 = const()[name = string("query_states_201_dilations_0"), val = tensor([1, 1])]; + int32 query_states_201_groups_0 = const()[name = string("query_states_201_groups_0"), val = int32(1)]; + tensor query_states_201 = conv(dilations = query_states_201_dilations_0, groups = query_states_201_groups_0, pad = query_states_201_pad_0, pad_type = query_states_201_pad_type_0, strides = query_states_201_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_15072_cast_fp16)[name = string("query_states_201")]; + string key_states_251_pad_type_0 = const()[name = string("key_states_251_pad_type_0"), val = string("valid")]; + tensor key_states_251_strides_0 = const()[name = string("key_states_251_strides_0"), val = tensor([1, 1])]; + tensor key_states_251_pad_0 = const()[name = string("key_states_251_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_251_dilations_0 = const()[name = string("key_states_251_dilations_0"), val = tensor([1, 1])]; + int32 key_states_251_groups_0 = const()[name = string("key_states_251_groups_0"), val = int32(1)]; + tensor key_states_251 = conv(dilations = key_states_251_dilations_0, groups = key_states_251_groups_0, pad = key_states_251_pad_0, pad_type = key_states_251_pad_type_0, strides = key_states_251_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_15072_cast_fp16)[name = string("key_states_251")]; + string value_states_201_pad_type_0 = const()[name = string("value_states_201_pad_type_0"), val = string("valid")]; + tensor value_states_201_strides_0 = const()[name = string("value_states_201_strides_0"), val = tensor([1, 1])]; + tensor value_states_201_pad_0 = const()[name = string("value_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_201_dilations_0 = const()[name = string("value_states_201_dilations_0"), val = tensor([1, 1])]; + int32 value_states_201_groups_0 = const()[name = string("value_states_201_groups_0"), val = int32(1)]; + tensor value_states_201 = conv(dilations = value_states_201_dilations_0, groups = value_states_201_groups_0, pad = value_states_201_pad_0, pad_type = value_states_201_pad_type_0, strides = value_states_201_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_15072_cast_fp16)[name = string("value_states_201")]; + tensor var_15114 = const()[name = string("op_15114"), val = tensor([1, 16, 128, 64])]; + tensor var_15115 = reshape(shape = var_15114, x = query_states_201)[name = string("op_15115")]; + tensor var_15120 = const()[name = string("op_15120"), val = tensor([0, 1, 3, 2])]; + tensor var_15125 = const()[name = string("op_15125"), val = tensor([1, 8, 128, 64])]; + tensor var_15126 = reshape(shape = var_15125, x = key_states_251)[name = string("op_15126")]; + tensor var_15131 = const()[name = string("op_15131"), val = tensor([0, 1, 3, 2])]; + tensor var_15136 = const()[name = string("op_15136"), val = tensor([1, 8, 128, 64])]; + tensor var_15137 = reshape(shape = var_15136, x = value_states_201)[name = string("op_15137")]; + tensor var_15142 = const()[name = string("op_15142"), val = tensor([0, 1, 3, 2])]; + int32 var_15153 = const()[name = string("op_15153"), val = int32(-1)]; + fp16 const_856_promoted = const()[name = string("const_856_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_255 = transpose(perm = var_15120, x = var_15115)[name = string("transpose_25")]; + tensor var_15155 = mul(x = hidden_states_255, y = const_856_promoted)[name = string("op_15155")]; + bool input_455_interleave_0 = const()[name = string("input_455_interleave_0"), val = bool(false)]; + tensor input_455 = concat(axis = var_15153, interleave = input_455_interleave_0, values = (hidden_states_255, var_15155))[name = string("input_455")]; + tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; + fp16 var_15150_to_fp16 = const()[name = string("op_15150_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_15150_to_fp16, x = input_455)[name = string("normed_405_cast_fp16")]; + tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_407 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407")]; + tensor const_859 = const()[name = string("const_859"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460276864)))]; + tensor q_51 = mul(x = normed_407, y = const_859)[name = string("q_51")]; + int32 var_15178 = const()[name = string("op_15178"), val = int32(-1)]; + fp16 const_860_promoted = const()[name = string("const_860_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_257 = transpose(perm = var_15131, x = var_15126)[name = string("transpose_24")]; + tensor var_15180 = mul(x = hidden_states_257, y = const_860_promoted)[name = string("op_15180")]; + bool input_457_interleave_0 = const()[name = string("input_457_interleave_0"), val = bool(false)]; + tensor input_457 = concat(axis = var_15178, interleave = input_457_interleave_0, values = (hidden_states_257, var_15180))[name = string("input_457")]; + tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; + fp16 var_15175_to_fp16 = const()[name = string("op_15175_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_15175_to_fp16, x = input_457)[name = string("normed_409_cast_fp16")]; + tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_411 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411")]; + tensor const_863 = const()[name = string("const_863"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460277184)))]; + tensor k_51 = mul(x = normed_411, y = const_863)[name = string("k_51")]; + tensor var_15206 = mul(x = q_51, y = cos_5)[name = string("op_15206")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_51)[name = string("x1_101")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_51)[name = string("x2_101")]; + fp16 const_866_promoted = const()[name = string("const_866_promoted"), val = fp16(-0x1p+0)]; + tensor var_15227 = mul(x = x2_101, y = const_866_promoted)[name = string("op_15227")]; + int32 var_15229 = const()[name = string("op_15229"), val = int32(-1)]; + bool var_15230_interleave_0 = const()[name = string("op_15230_interleave_0"), val = bool(false)]; + tensor var_15230 = concat(axis = var_15229, interleave = var_15230_interleave_0, values = (var_15227, x1_101))[name = string("op_15230")]; + tensor var_15231 = mul(x = var_15230, y = sin_5)[name = string("op_15231")]; + tensor query_states_203 = add(x = var_15206, y = var_15231)[name = string("query_states_203")]; + tensor var_15234 = mul(x = k_51, y = cos_5)[name = string("op_15234")]; + tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_103 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_51)[name = string("x1_103")]; + tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_103 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_51)[name = string("x2_103")]; + fp16 const_869_promoted = const()[name = string("const_869_promoted"), val = fp16(-0x1p+0)]; + tensor var_15255 = mul(x = x2_103, y = const_869_promoted)[name = string("op_15255")]; + int32 var_15257 = const()[name = string("op_15257"), val = int32(-1)]; + bool var_15258_interleave_0 = const()[name = string("op_15258_interleave_0"), val = bool(false)]; + tensor var_15258 = concat(axis = var_15257, interleave = var_15258_interleave_0, values = (var_15255, x1_103))[name = string("op_15258")]; + tensor var_15259 = mul(x = var_15258, y = sin_5)[name = string("op_15259")]; + tensor key_states_253 = add(x = var_15234, y = var_15259)[name = string("key_states_253")]; + tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([25])]; + tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; + tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; + tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([26])]; + int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; + bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; + tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_452")]; + tensor concat_453_values1_0 = const()[name = string("concat_453_values1_0"), val = tensor([0])]; + tensor concat_453_values3_0 = const()[name = string("concat_453_values3_0"), val = tensor([0])]; + int32 concat_453_axis_0 = const()[name = string("concat_453_axis_0"), val = int32(0)]; + bool concat_453_interleave_0 = const()[name = string("concat_453_interleave_0"), val = bool(false)]; + tensor concat_453 = concat(axis = concat_453_axis_0, interleave = concat_453_interleave_0, values = (expand_dims_304, concat_453_values1_0, var_1781, concat_453_values3_0))[name = string("concat_453")]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16 = slice_update(begin = concat_452, begin_mask = model_model_kv_cache_0_internal_tensor_assign_51_begin_mask_0, end = concat_453, end_mask = model_model_kv_cache_0_internal_tensor_assign_51_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_51_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_51_stride_0, update = key_states_253, x = coreml_update_state_105)[name = string("model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_51_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_106 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_106")]; + tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([53])]; + tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; + tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; + tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([54])]; + int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; + bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; + tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_456")]; + tensor concat_457_values1_0 = const()[name = string("concat_457_values1_0"), val = tensor([0])]; + tensor concat_457_values3_0 = const()[name = string("concat_457_values3_0"), val = tensor([0])]; + int32 concat_457_axis_0 = const()[name = string("concat_457_axis_0"), val = int32(0)]; + bool concat_457_interleave_0 = const()[name = string("concat_457_interleave_0"), val = bool(false)]; + tensor concat_457 = concat(axis = concat_457_axis_0, interleave = concat_457_interleave_0, values = (expand_dims_310, concat_457_values1_0, var_1781, concat_457_values3_0))[name = string("concat_457")]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_203 = transpose(perm = var_15142, x = var_15137)[name = string("transpose_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16 = slice_update(begin = concat_456, begin_mask = model_model_kv_cache_0_internal_tensor_assign_52_begin_mask_0, end = concat_457, end_mask = model_model_kv_cache_0_internal_tensor_assign_52_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_52_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_52_stride_0, update = value_states_203, x = coreml_update_state_106)[name = string("model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_52_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_107 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_107")]; + tensor var_15330_begin_0 = const()[name = string("op_15330_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor var_15330_end_0 = const()[name = string("op_15330_end_0"), val = tensor([26, 8, 4096, 128])]; + tensor var_15330_end_mask_0 = const()[name = string("op_15330_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15330_cast_fp16 = slice_by_index(begin = var_15330_begin_0, end = var_15330_end_0, end_mask = var_15330_end_mask_0, x = coreml_update_state_107)[name = string("op_15330_cast_fp16")]; + tensor K_layer_cache_51_axes_0 = const()[name = string("K_layer_cache_51_axes_0"), val = tensor([0])]; + tensor K_layer_cache_51_cast_fp16 = squeeze(axes = K_layer_cache_51_axes_0, x = var_15330_cast_fp16)[name = string("K_layer_cache_51_cast_fp16")]; + tensor var_15337_begin_0 = const()[name = string("op_15337_begin_0"), val = tensor([53, 0, 0, 0])]; + tensor var_15337_end_0 = const()[name = string("op_15337_end_0"), val = tensor([54, 8, 4096, 128])]; + tensor var_15337_end_mask_0 = const()[name = string("op_15337_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15337_cast_fp16 = slice_by_index(begin = var_15337_begin_0, end = var_15337_end_0, end_mask = var_15337_end_mask_0, x = coreml_update_state_107)[name = string("op_15337_cast_fp16")]; + tensor V_layer_cache_51_axes_0 = const()[name = string("V_layer_cache_51_axes_0"), val = tensor([0])]; + tensor V_layer_cache_51_cast_fp16 = squeeze(axes = V_layer_cache_51_axes_0, x = var_15337_cast_fp16)[name = string("V_layer_cache_51_cast_fp16")]; + tensor x_403_axes_0 = const()[name = string("x_403_axes_0"), val = tensor([1])]; + tensor x_403_cast_fp16 = expand_dims(axes = x_403_axes_0, x = K_layer_cache_51_cast_fp16)[name = string("x_403_cast_fp16")]; + tensor var_15366 = const()[name = string("op_15366"), val = tensor([1, 2, 1, 1])]; + tensor x_405_cast_fp16 = tile(reps = var_15366, x = x_403_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_15378 = const()[name = string("op_15378"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_257_cast_fp16 = reshape(shape = var_15378, x = x_405_cast_fp16)[name = string("key_states_257_cast_fp16")]; + tensor x_409_axes_0 = const()[name = string("x_409_axes_0"), val = tensor([1])]; + tensor x_409_cast_fp16 = expand_dims(axes = x_409_axes_0, x = V_layer_cache_51_cast_fp16)[name = string("x_409_cast_fp16")]; + tensor var_15386 = const()[name = string("op_15386"), val = tensor([1, 2, 1, 1])]; + tensor x_411_cast_fp16 = tile(reps = var_15386, x = x_409_cast_fp16)[name = string("x_411_cast_fp16")]; + bool var_15413_transpose_x_0 = const()[name = string("op_15413_transpose_x_0"), val = bool(false)]; + bool var_15413_transpose_y_0 = const()[name = string("op_15413_transpose_y_0"), val = bool(true)]; + tensor var_15413 = matmul(transpose_x = var_15413_transpose_x_0, transpose_y = var_15413_transpose_y_0, x = query_states_203, y = key_states_257_cast_fp16)[name = string("op_15413")]; + fp16 var_15414_to_fp16 = const()[name = string("op_15414_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_101_cast_fp16 = mul(x = var_15413, y = var_15414_to_fp16)[name = string("attn_weights_101_cast_fp16")]; + tensor attn_weights_103_cast_fp16 = add(x = attn_weights_101_cast_fp16, y = causal_mask)[name = string("attn_weights_103_cast_fp16")]; + int32 var_15449 = const()[name = string("op_15449"), val = int32(-1)]; + tensor var_15451_cast_fp16 = softmax(axis = var_15449, x = attn_weights_103_cast_fp16)[name = string("op_15451_cast_fp16")]; + tensor concat_462 = const()[name = string("concat_462"), val = tensor([16, 64, 4096])]; + tensor reshape_75_cast_fp16 = reshape(shape = concat_462, x = var_15451_cast_fp16)[name = string("reshape_75_cast_fp16")]; + tensor concat_463 = const()[name = string("concat_463"), val = tensor([16, 4096, 128])]; + tensor reshape_76_cast_fp16 = reshape(shape = concat_463, x = x_411_cast_fp16)[name = string("reshape_76_cast_fp16")]; + bool matmul_25_transpose_x_0 = const()[name = string("matmul_25_transpose_x_0"), val = bool(false)]; + bool matmul_25_transpose_y_0 = const()[name = string("matmul_25_transpose_y_0"), val = bool(false)]; + tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_0, transpose_y = matmul_25_transpose_y_0, x = reshape_75_cast_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")]; + tensor concat_467 = const()[name = string("concat_467"), val = tensor([1, 16, 64, 128])]; + tensor reshape_77_cast_fp16 = reshape(shape = concat_467, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")]; + tensor var_15463_perm_0 = const()[name = string("op_15463_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_15482 = const()[name = string("op_15482"), val = tensor([1, 64, 2048])]; + tensor var_15463_cast_fp16 = transpose(perm = var_15463_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_22")]; + tensor attn_output_255_cast_fp16 = reshape(shape = var_15482, x = var_15463_cast_fp16)[name = string("attn_output_255_cast_fp16")]; + tensor var_15487 = const()[name = string("op_15487"), val = tensor([0, 2, 1])]; + string var_15503_pad_type_0 = const()[name = string("op_15503_pad_type_0"), val = string("valid")]; + int32 var_15503_groups_0 = const()[name = string("op_15503_groups_0"), val = int32(1)]; + tensor var_15503_strides_0 = const()[name = string("op_15503_strides_0"), val = tensor([1])]; + tensor var_15503_pad_0 = const()[name = string("op_15503_pad_0"), val = tensor([0, 0])]; + tensor var_15503_dilations_0 = const()[name = string("op_15503_dilations_0"), val = tensor([1])]; + tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(460277504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462374720))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_15488_cast_fp16 = transpose(perm = var_15487, x = attn_output_255_cast_fp16)[name = string("transpose_21")]; + tensor var_15503_cast_fp16 = conv(dilations = var_15503_dilations_0, groups = var_15503_groups_0, pad = var_15503_pad_0, pad_type = var_15503_pad_type_0, strides = var_15503_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_15488_cast_fp16)[name = string("op_15503_cast_fp16")]; + tensor var_15507 = const()[name = string("op_15507"), val = tensor([0, 2, 1])]; + tensor attn_output_259_cast_fp16 = transpose(perm = var_15507, x = var_15503_cast_fp16)[name = string("transpose_20")]; + tensor hidden_states_259_cast_fp16 = add(x = hidden_states_251_cast_fp16, y = attn_output_259_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; + int32 var_15520 = const()[name = string("op_15520"), val = int32(-1)]; + fp16 const_881_promoted_to_fp16 = const()[name = string("const_881_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15522_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = const_881_promoted_to_fp16)[name = string("op_15522_cast_fp16")]; + bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; + tensor input_461_cast_fp16 = concat(axis = var_15520, interleave = input_461_interleave_0, values = (hidden_states_259_cast_fp16, var_15522_cast_fp16))[name = string("input_461_cast_fp16")]; + tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; + fp16 var_15517_to_fp16 = const()[name = string("op_15517_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_15517_to_fp16, x = input_461_cast_fp16)[name = string("normed_413_cast_fp16")]; + tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_415_cast_fp16 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415_cast_fp16")]; + tensor const_884_promoted_to_fp16 = const()[name = string("const_884_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462440320)))]; + tensor x_413_cast_fp16 = mul(x = normed_415_cast_fp16, y = const_884_promoted_to_fp16)[name = string("x_413_cast_fp16")]; + tensor var_15547 = const()[name = string("op_15547"), val = tensor([0, 2, 1])]; + tensor input_463_axes_0 = const()[name = string("input_463_axes_0"), val = tensor([2])]; + tensor var_15548 = transpose(perm = var_15547, x = x_413_cast_fp16)[name = string("transpose_19")]; + tensor input_463 = expand_dims(axes = input_463_axes_0, x = var_15548)[name = string("input_463")]; + string input_465_pad_type_0 = const()[name = string("input_465_pad_type_0"), val = string("valid")]; + tensor input_465_strides_0 = const()[name = string("input_465_strides_0"), val = tensor([1, 1])]; + tensor input_465_pad_0 = const()[name = string("input_465_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_465_dilations_0 = const()[name = string("input_465_dilations_0"), val = tensor([1, 1])]; + int32 input_465_groups_0 = const()[name = string("input_465_groups_0"), val = int32(1)]; + tensor input_465 = conv(dilations = input_465_dilations_0, groups = input_465_groups_0, pad = input_465_pad_0, pad_type = input_465_pad_type_0, strides = input_465_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_palettized, x = input_463)[name = string("input_465")]; + string b_51_pad_type_0 = const()[name = string("b_51_pad_type_0"), val = string("valid")]; + tensor b_51_strides_0 = const()[name = string("b_51_strides_0"), val = tensor([1, 1])]; + tensor b_51_pad_0 = const()[name = string("b_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_51_dilations_0 = const()[name = string("b_51_dilations_0"), val = tensor([1, 1])]; + int32 b_51_groups_0 = const()[name = string("b_51_groups_0"), val = int32(1)]; + tensor b_51 = conv(dilations = b_51_dilations_0, groups = b_51_groups_0, pad = b_51_pad_0, pad_type = b_51_pad_type_0, strides = b_51_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_palettized, x = input_463)[name = string("b_51")]; + tensor c_51 = silu(x = input_465)[name = string("c_51")]; + tensor input_467 = mul(x = c_51, y = b_51)[name = string("input_467")]; + string e_51_pad_type_0 = const()[name = string("e_51_pad_type_0"), val = string("valid")]; + tensor e_51_strides_0 = const()[name = string("e_51_strides_0"), val = tensor([1, 1])]; + tensor e_51_pad_0 = const()[name = string("e_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_51_dilations_0 = const()[name = string("e_51_dilations_0"), val = tensor([1, 1])]; + int32 e_51_groups_0 = const()[name = string("e_51_groups_0"), val = int32(1)]; + tensor e_51 = conv(dilations = e_51_dilations_0, groups = e_51_groups_0, pad = e_51_pad_0, pad_type = e_51_pad_type_0, strides = e_51_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_palettized, x = input_467)[name = string("e_51")]; + tensor var_15570_axes_0 = const()[name = string("op_15570_axes_0"), val = tensor([2])]; + tensor var_15570 = squeeze(axes = var_15570_axes_0, x = e_51)[name = string("op_15570")]; + tensor var_15571 = const()[name = string("op_15571"), val = tensor([0, 2, 1])]; + tensor var_15572 = transpose(perm = var_15571, x = var_15570)[name = string("transpose_18")]; + tensor hidden_states_261_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = var_15572)[name = string("hidden_states_261_cast_fp16")]; + int32 var_15584 = const()[name = string("op_15584"), val = int32(-1)]; + fp16 const_885_promoted_to_fp16 = const()[name = string("const_885_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_15586_cast_fp16 = mul(x = hidden_states_261_cast_fp16, y = const_885_promoted_to_fp16)[name = string("op_15586_cast_fp16")]; + bool input_469_interleave_0 = const()[name = string("input_469_interleave_0"), val = bool(false)]; + tensor input_469_cast_fp16 = concat(axis = var_15584, interleave = input_469_interleave_0, values = (hidden_states_261_cast_fp16, var_15586_cast_fp16))[name = string("input_469_cast_fp16")]; + tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; + fp16 var_15581_to_fp16 = const()[name = string("op_15581_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_15581_to_fp16, x = input_469_cast_fp16)[name = string("normed_417_cast_fp16")]; + tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_419_cast_fp16 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419_cast_fp16")]; + tensor const_888_promoted_to_fp16 = const()[name = string("const_888_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462442432)))]; + tensor hidden_states_263_cast_fp16 = mul(x = normed_419_cast_fp16, y = const_888_promoted_to_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor var_15609 = const()[name = string("op_15609"), val = tensor([0, 2, 1])]; + tensor var_15612_axes_0 = const()[name = string("op_15612_axes_0"), val = tensor([2])]; + tensor var_15610_cast_fp16 = transpose(perm = var_15609, x = hidden_states_263_cast_fp16)[name = string("transpose_17")]; + tensor var_15612_cast_fp16 = expand_dims(axes = var_15612_axes_0, x = var_15610_cast_fp16)[name = string("op_15612_cast_fp16")]; + string query_states_209_pad_type_0 = const()[name = string("query_states_209_pad_type_0"), val = string("valid")]; + tensor query_states_209_strides_0 = const()[name = string("query_states_209_strides_0"), val = tensor([1, 1])]; + tensor query_states_209_pad_0 = const()[name = string("query_states_209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_209_dilations_0 = const()[name = string("query_states_209_dilations_0"), val = tensor([1, 1])]; + int32 query_states_209_groups_0 = const()[name = string("query_states_209_groups_0"), val = int32(1)]; + tensor query_states_209 = conv(dilations = query_states_209_dilations_0, groups = query_states_209_groups_0, pad = query_states_209_pad_0, pad_type = query_states_209_pad_type_0, strides = query_states_209_strides_0, weight = model_model_layers_26_self_attn_q_proj_weight_palettized, x = var_15612_cast_fp16)[name = string("query_states_209")]; + string key_states_261_pad_type_0 = const()[name = string("key_states_261_pad_type_0"), val = string("valid")]; + tensor key_states_261_strides_0 = const()[name = string("key_states_261_strides_0"), val = tensor([1, 1])]; + tensor key_states_261_pad_0 = const()[name = string("key_states_261_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_261_dilations_0 = const()[name = string("key_states_261_dilations_0"), val = tensor([1, 1])]; + int32 key_states_261_groups_0 = const()[name = string("key_states_261_groups_0"), val = int32(1)]; + tensor key_states_261 = conv(dilations = key_states_261_dilations_0, groups = key_states_261_groups_0, pad = key_states_261_pad_0, pad_type = key_states_261_pad_type_0, strides = key_states_261_strides_0, weight = model_model_layers_26_self_attn_k_proj_weight_palettized, x = var_15612_cast_fp16)[name = string("key_states_261")]; + string value_states_209_pad_type_0 = const()[name = string("value_states_209_pad_type_0"), val = string("valid")]; + tensor value_states_209_strides_0 = const()[name = string("value_states_209_strides_0"), val = tensor([1, 1])]; + tensor value_states_209_pad_0 = const()[name = string("value_states_209_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_209_dilations_0 = const()[name = string("value_states_209_dilations_0"), val = tensor([1, 1])]; + int32 value_states_209_groups_0 = const()[name = string("value_states_209_groups_0"), val = int32(1)]; + tensor value_states_209 = conv(dilations = value_states_209_dilations_0, groups = value_states_209_groups_0, pad = value_states_209_pad_0, pad_type = value_states_209_pad_type_0, strides = value_states_209_strides_0, weight = model_model_layers_26_self_attn_v_proj_weight_palettized, x = var_15612_cast_fp16)[name = string("value_states_209")]; + tensor var_15654 = const()[name = string("op_15654"), val = tensor([1, 16, 128, 64])]; + tensor var_15655 = reshape(shape = var_15654, x = query_states_209)[name = string("op_15655")]; + tensor var_15660 = const()[name = string("op_15660"), val = tensor([0, 1, 3, 2])]; + tensor var_15665 = const()[name = string("op_15665"), val = tensor([1, 8, 128, 64])]; + tensor var_15666 = reshape(shape = var_15665, x = key_states_261)[name = string("op_15666")]; + tensor var_15671 = const()[name = string("op_15671"), val = tensor([0, 1, 3, 2])]; + tensor var_15676 = const()[name = string("op_15676"), val = tensor([1, 8, 128, 64])]; + tensor var_15677 = reshape(shape = var_15676, x = value_states_209)[name = string("op_15677")]; + tensor var_15682 = const()[name = string("op_15682"), val = tensor([0, 1, 3, 2])]; + int32 var_15693 = const()[name = string("op_15693"), val = int32(-1)]; + fp16 const_890_promoted = const()[name = string("const_890_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_265 = transpose(perm = var_15660, x = var_15655)[name = string("transpose_16")]; + tensor var_15695 = mul(x = hidden_states_265, y = const_890_promoted)[name = string("op_15695")]; + bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; + tensor input_473 = concat(axis = var_15693, interleave = input_473_interleave_0, values = (hidden_states_265, var_15695))[name = string("input_473")]; + tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; + fp16 var_15690_to_fp16 = const()[name = string("op_15690_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_15690_to_fp16, x = input_473)[name = string("normed_421_cast_fp16")]; + tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_423 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423")]; + tensor const_893 = const()[name = string("const_893"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462444544)))]; + tensor q_53 = mul(x = normed_423, y = const_893)[name = string("q_53")]; + int32 var_15718 = const()[name = string("op_15718"), val = int32(-1)]; + fp16 const_894_promoted = const()[name = string("const_894_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_267 = transpose(perm = var_15671, x = var_15666)[name = string("transpose_15")]; + tensor var_15720 = mul(x = hidden_states_267, y = const_894_promoted)[name = string("op_15720")]; + bool input_475_interleave_0 = const()[name = string("input_475_interleave_0"), val = bool(false)]; + tensor input_475 = concat(axis = var_15718, interleave = input_475_interleave_0, values = (hidden_states_267, var_15720))[name = string("input_475")]; + tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; + fp16 var_15715_to_fp16 = const()[name = string("op_15715_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_15715_to_fp16, x = input_475)[name = string("normed_425_cast_fp16")]; + tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_427 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427")]; + tensor const_897 = const()[name = string("const_897"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462444864)))]; + tensor k_53 = mul(x = normed_427, y = const_897)[name = string("k_53")]; + tensor var_15746 = mul(x = q_53, y = cos_5)[name = string("op_15746")]; + tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_105 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_53)[name = string("x1_105")]; + tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_105 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_53)[name = string("x2_105")]; + fp16 const_900_promoted = const()[name = string("const_900_promoted"), val = fp16(-0x1p+0)]; + tensor var_15767 = mul(x = x2_105, y = const_900_promoted)[name = string("op_15767")]; + int32 var_15769 = const()[name = string("op_15769"), val = int32(-1)]; + bool var_15770_interleave_0 = const()[name = string("op_15770_interleave_0"), val = bool(false)]; + tensor var_15770 = concat(axis = var_15769, interleave = var_15770_interleave_0, values = (var_15767, x1_105))[name = string("op_15770")]; + tensor var_15771 = mul(x = var_15770, y = sin_5)[name = string("op_15771")]; + tensor query_states_211 = add(x = var_15746, y = var_15771)[name = string("query_states_211")]; + tensor var_15774 = mul(x = k_53, y = cos_5)[name = string("op_15774")]; + tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_107 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_53)[name = string("x1_107")]; + tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_107 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_53)[name = string("x2_107")]; + fp16 const_903_promoted = const()[name = string("const_903_promoted"), val = fp16(-0x1p+0)]; + tensor var_15795 = mul(x = x2_107, y = const_903_promoted)[name = string("op_15795")]; + int32 var_15797 = const()[name = string("op_15797"), val = int32(-1)]; + bool var_15798_interleave_0 = const()[name = string("op_15798_interleave_0"), val = bool(false)]; + tensor var_15798 = concat(axis = var_15797, interleave = var_15798_interleave_0, values = (var_15795, x1_107))[name = string("op_15798")]; + tensor var_15799 = mul(x = var_15798, y = sin_5)[name = string("op_15799")]; + tensor key_states_263 = add(x = var_15774, y = var_15799)[name = string("key_states_263")]; + tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([26])]; + tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; + tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; + tensor expand_dims_316 = const()[name = string("expand_dims_316"), val = tensor([27])]; + int32 concat_470_axis_0 = const()[name = string("concat_470_axis_0"), val = int32(0)]; + bool concat_470_interleave_0 = const()[name = string("concat_470_interleave_0"), val = bool(false)]; + tensor concat_470 = concat(axis = concat_470_axis_0, interleave = concat_470_interleave_0, values = (expand_dims_312, expand_dims_313, current_pos, expand_dims_315))[name = string("concat_470")]; + tensor concat_471_values1_0 = const()[name = string("concat_471_values1_0"), val = tensor([0])]; + tensor concat_471_values3_0 = const()[name = string("concat_471_values3_0"), val = tensor([0])]; + int32 concat_471_axis_0 = const()[name = string("concat_471_axis_0"), val = int32(0)]; + bool concat_471_interleave_0 = const()[name = string("concat_471_interleave_0"), val = bool(false)]; + tensor concat_471 = concat(axis = concat_471_axis_0, interleave = concat_471_interleave_0, values = (expand_dims_316, concat_471_values1_0, var_1781, concat_471_values3_0))[name = string("concat_471")]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16 = slice_update(begin = concat_470, begin_mask = model_model_kv_cache_0_internal_tensor_assign_53_begin_mask_0, end = concat_471, end_mask = model_model_kv_cache_0_internal_tensor_assign_53_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_53_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_53_stride_0, update = key_states_263, x = coreml_update_state_107)[name = string("model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_53_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_108 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_108")]; + tensor expand_dims_318 = const()[name = string("expand_dims_318"), val = tensor([54])]; + tensor expand_dims_319 = const()[name = string("expand_dims_319"), val = tensor([0])]; + tensor expand_dims_321 = const()[name = string("expand_dims_321"), val = tensor([0])]; + tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([55])]; + int32 concat_474_axis_0 = const()[name = string("concat_474_axis_0"), val = int32(0)]; + bool concat_474_interleave_0 = const()[name = string("concat_474_interleave_0"), val = bool(false)]; + tensor concat_474 = concat(axis = concat_474_axis_0, interleave = concat_474_interleave_0, values = (expand_dims_318, expand_dims_319, current_pos, expand_dims_321))[name = string("concat_474")]; + tensor concat_475_values1_0 = const()[name = string("concat_475_values1_0"), val = tensor([0])]; + tensor concat_475_values3_0 = const()[name = string("concat_475_values3_0"), val = tensor([0])]; + int32 concat_475_axis_0 = const()[name = string("concat_475_axis_0"), val = int32(0)]; + bool concat_475_interleave_0 = const()[name = string("concat_475_interleave_0"), val = bool(false)]; + tensor concat_475 = concat(axis = concat_475_axis_0, interleave = concat_475_interleave_0, values = (expand_dims_322, concat_475_values1_0, var_1781, concat_475_values3_0))[name = string("concat_475")]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_211 = transpose(perm = var_15682, x = var_15677)[name = string("transpose_14")]; + tensor model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16 = slice_update(begin = concat_474, begin_mask = model_model_kv_cache_0_internal_tensor_assign_54_begin_mask_0, end = concat_475, end_mask = model_model_kv_cache_0_internal_tensor_assign_54_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_54_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_54_stride_0, update = value_states_211, x = coreml_update_state_108)[name = string("model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_54_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_109 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_109")]; + tensor var_15870_begin_0 = const()[name = string("op_15870_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor var_15870_end_0 = const()[name = string("op_15870_end_0"), val = tensor([27, 8, 4096, 128])]; + tensor var_15870_end_mask_0 = const()[name = string("op_15870_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15870_cast_fp16 = slice_by_index(begin = var_15870_begin_0, end = var_15870_end_0, end_mask = var_15870_end_mask_0, x = coreml_update_state_109)[name = string("op_15870_cast_fp16")]; + tensor K_layer_cache_53_axes_0 = const()[name = string("K_layer_cache_53_axes_0"), val = tensor([0])]; + tensor K_layer_cache_53_cast_fp16 = squeeze(axes = K_layer_cache_53_axes_0, x = var_15870_cast_fp16)[name = string("K_layer_cache_53_cast_fp16")]; + tensor var_15877_begin_0 = const()[name = string("op_15877_begin_0"), val = tensor([54, 0, 0, 0])]; + tensor var_15877_end_0 = const()[name = string("op_15877_end_0"), val = tensor([55, 8, 4096, 128])]; + tensor var_15877_end_mask_0 = const()[name = string("op_15877_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_15877_cast_fp16 = slice_by_index(begin = var_15877_begin_0, end = var_15877_end_0, end_mask = var_15877_end_mask_0, x = coreml_update_state_109)[name = string("op_15877_cast_fp16")]; + tensor V_layer_cache_53_axes_0 = const()[name = string("V_layer_cache_53_axes_0"), val = tensor([0])]; + tensor V_layer_cache_53_cast_fp16 = squeeze(axes = V_layer_cache_53_axes_0, x = var_15877_cast_fp16)[name = string("V_layer_cache_53_cast_fp16")]; + tensor x_419_axes_0 = const()[name = string("x_419_axes_0"), val = tensor([1])]; + tensor x_419_cast_fp16 = expand_dims(axes = x_419_axes_0, x = K_layer_cache_53_cast_fp16)[name = string("x_419_cast_fp16")]; + tensor var_15906 = const()[name = string("op_15906"), val = tensor([1, 2, 1, 1])]; + tensor x_421_cast_fp16 = tile(reps = var_15906, x = x_419_cast_fp16)[name = string("x_421_cast_fp16")]; + tensor var_15918 = const()[name = string("op_15918"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_267_cast_fp16 = reshape(shape = var_15918, x = x_421_cast_fp16)[name = string("key_states_267_cast_fp16")]; + tensor x_425_axes_0 = const()[name = string("x_425_axes_0"), val = tensor([1])]; + tensor x_425_cast_fp16 = expand_dims(axes = x_425_axes_0, x = V_layer_cache_53_cast_fp16)[name = string("x_425_cast_fp16")]; + tensor var_15926 = const()[name = string("op_15926"), val = tensor([1, 2, 1, 1])]; + tensor x_427_cast_fp16 = tile(reps = var_15926, x = x_425_cast_fp16)[name = string("x_427_cast_fp16")]; + bool var_15953_transpose_x_0 = const()[name = string("op_15953_transpose_x_0"), val = bool(false)]; + bool var_15953_transpose_y_0 = const()[name = string("op_15953_transpose_y_0"), val = bool(true)]; + tensor var_15953 = matmul(transpose_x = var_15953_transpose_x_0, transpose_y = var_15953_transpose_y_0, x = query_states_211, y = key_states_267_cast_fp16)[name = string("op_15953")]; + fp16 var_15954_to_fp16 = const()[name = string("op_15954_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_105_cast_fp16 = mul(x = var_15953, y = var_15954_to_fp16)[name = string("attn_weights_105_cast_fp16")]; + tensor attn_weights_107_cast_fp16 = add(x = attn_weights_105_cast_fp16, y = causal_mask)[name = string("attn_weights_107_cast_fp16")]; + int32 var_15989 = const()[name = string("op_15989"), val = int32(-1)]; + tensor var_15991_cast_fp16 = softmax(axis = var_15989, x = attn_weights_107_cast_fp16)[name = string("op_15991_cast_fp16")]; + tensor concat_480 = const()[name = string("concat_480"), val = tensor([16, 64, 4096])]; + tensor reshape_78_cast_fp16 = reshape(shape = concat_480, x = var_15991_cast_fp16)[name = string("reshape_78_cast_fp16")]; + tensor concat_481 = const()[name = string("concat_481"), val = tensor([16, 4096, 128])]; + tensor reshape_79_cast_fp16 = reshape(shape = concat_481, x = x_427_cast_fp16)[name = string("reshape_79_cast_fp16")]; + bool matmul_26_transpose_x_0 = const()[name = string("matmul_26_transpose_x_0"), val = bool(false)]; + bool matmul_26_transpose_y_0 = const()[name = string("matmul_26_transpose_y_0"), val = bool(false)]; + tensor matmul_26_cast_fp16 = matmul(transpose_x = matmul_26_transpose_x_0, transpose_y = matmul_26_transpose_y_0, x = reshape_78_cast_fp16, y = reshape_79_cast_fp16)[name = string("matmul_26_cast_fp16")]; + tensor concat_485 = const()[name = string("concat_485"), val = tensor([1, 16, 64, 128])]; + tensor reshape_80_cast_fp16 = reshape(shape = concat_485, x = matmul_26_cast_fp16)[name = string("reshape_80_cast_fp16")]; + tensor var_16003_perm_0 = const()[name = string("op_16003_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16022 = const()[name = string("op_16022"), val = tensor([1, 64, 2048])]; + tensor var_16003_cast_fp16 = transpose(perm = var_16003_perm_0, x = reshape_80_cast_fp16)[name = string("transpose_13")]; + tensor attn_output_265_cast_fp16 = reshape(shape = var_16022, x = var_16003_cast_fp16)[name = string("attn_output_265_cast_fp16")]; + tensor var_16027 = const()[name = string("op_16027"), val = tensor([0, 2, 1])]; + string var_16043_pad_type_0 = const()[name = string("op_16043_pad_type_0"), val = string("valid")]; + int32 var_16043_groups_0 = const()[name = string("op_16043_groups_0"), val = int32(1)]; + tensor var_16043_strides_0 = const()[name = string("op_16043_strides_0"), val = tensor([1])]; + tensor var_16043_pad_0 = const()[name = string("op_16043_pad_0"), val = tensor([0, 0])]; + tensor var_16043_dilations_0 = const()[name = string("op_16043_dilations_0"), val = tensor([1])]; + tensor squeeze_26_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462445184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464542400))))[name = string("squeeze_26_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16028_cast_fp16 = transpose(perm = var_16027, x = attn_output_265_cast_fp16)[name = string("transpose_12")]; + tensor var_16043_cast_fp16 = conv(dilations = var_16043_dilations_0, groups = var_16043_groups_0, pad = var_16043_pad_0, pad_type = var_16043_pad_type_0, strides = var_16043_strides_0, weight = squeeze_26_cast_fp16_to_fp32_to_fp16_palettized, x = var_16028_cast_fp16)[name = string("op_16043_cast_fp16")]; + tensor var_16047 = const()[name = string("op_16047"), val = tensor([0, 2, 1])]; + tensor attn_output_269_cast_fp16 = transpose(perm = var_16047, x = var_16043_cast_fp16)[name = string("transpose_11")]; + tensor hidden_states_269_cast_fp16 = add(x = hidden_states_261_cast_fp16, y = attn_output_269_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; + int32 var_16060 = const()[name = string("op_16060"), val = int32(-1)]; + fp16 const_915_promoted_to_fp16 = const()[name = string("const_915_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16062_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_915_promoted_to_fp16)[name = string("op_16062_cast_fp16")]; + bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; + tensor input_479_cast_fp16 = concat(axis = var_16060, interleave = input_479_interleave_0, values = (hidden_states_269_cast_fp16, var_16062_cast_fp16))[name = string("input_479_cast_fp16")]; + tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; + fp16 var_16057_to_fp16 = const()[name = string("op_16057_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_16057_to_fp16, x = input_479_cast_fp16)[name = string("normed_429_cast_fp16")]; + tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; + tensor const_918_promoted_to_fp16 = const()[name = string("const_918_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464608000)))]; + tensor x_429_cast_fp16 = mul(x = normed_431_cast_fp16, y = const_918_promoted_to_fp16)[name = string("x_429_cast_fp16")]; + tensor var_16087 = const()[name = string("op_16087"), val = tensor([0, 2, 1])]; + tensor input_481_axes_0 = const()[name = string("input_481_axes_0"), val = tensor([2])]; + tensor var_16088 = transpose(perm = var_16087, x = x_429_cast_fp16)[name = string("transpose_10")]; + tensor input_481 = expand_dims(axes = input_481_axes_0, x = var_16088)[name = string("input_481")]; + string input_483_pad_type_0 = const()[name = string("input_483_pad_type_0"), val = string("valid")]; + tensor input_483_strides_0 = const()[name = string("input_483_strides_0"), val = tensor([1, 1])]; + tensor input_483_pad_0 = const()[name = string("input_483_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_483_dilations_0 = const()[name = string("input_483_dilations_0"), val = tensor([1, 1])]; + int32 input_483_groups_0 = const()[name = string("input_483_groups_0"), val = int32(1)]; + tensor input_483 = conv(dilations = input_483_dilations_0, groups = input_483_groups_0, pad = input_483_pad_0, pad_type = input_483_pad_type_0, strides = input_483_strides_0, weight = model_model_layers_26_mlp_gate_proj_weight_palettized, x = input_481)[name = string("input_483")]; + string b_53_pad_type_0 = const()[name = string("b_53_pad_type_0"), val = string("valid")]; + tensor b_53_strides_0 = const()[name = string("b_53_strides_0"), val = tensor([1, 1])]; + tensor b_53_pad_0 = const()[name = string("b_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_53_dilations_0 = const()[name = string("b_53_dilations_0"), val = tensor([1, 1])]; + int32 b_53_groups_0 = const()[name = string("b_53_groups_0"), val = int32(1)]; + tensor b_53 = conv(dilations = b_53_dilations_0, groups = b_53_groups_0, pad = b_53_pad_0, pad_type = b_53_pad_type_0, strides = b_53_strides_0, weight = model_model_layers_26_mlp_up_proj_weight_palettized, x = input_481)[name = string("b_53")]; + tensor c_53 = silu(x = input_483)[name = string("c_53")]; + tensor input_485 = mul(x = c_53, y = b_53)[name = string("input_485")]; + string e_53_pad_type_0 = const()[name = string("e_53_pad_type_0"), val = string("valid")]; + tensor e_53_strides_0 = const()[name = string("e_53_strides_0"), val = tensor([1, 1])]; + tensor e_53_pad_0 = const()[name = string("e_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_53_dilations_0 = const()[name = string("e_53_dilations_0"), val = tensor([1, 1])]; + int32 e_53_groups_0 = const()[name = string("e_53_groups_0"), val = int32(1)]; + tensor e_53 = conv(dilations = e_53_dilations_0, groups = e_53_groups_0, pad = e_53_pad_0, pad_type = e_53_pad_type_0, strides = e_53_strides_0, weight = model_model_layers_26_mlp_down_proj_weight_palettized, x = input_485)[name = string("e_53")]; + tensor var_16110_axes_0 = const()[name = string("op_16110_axes_0"), val = tensor([2])]; + tensor var_16110 = squeeze(axes = var_16110_axes_0, x = e_53)[name = string("op_16110")]; + tensor var_16111 = const()[name = string("op_16111"), val = tensor([0, 2, 1])]; + tensor var_16112 = transpose(perm = var_16111, x = var_16110)[name = string("transpose_9")]; + tensor hidden_states_271_cast_fp16 = add(x = hidden_states_269_cast_fp16, y = var_16112)[name = string("hidden_states_271_cast_fp16")]; + int32 var_16124 = const()[name = string("op_16124"), val = int32(-1)]; + fp16 const_919_promoted_to_fp16 = const()[name = string("const_919_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16126_cast_fp16 = mul(x = hidden_states_271_cast_fp16, y = const_919_promoted_to_fp16)[name = string("op_16126_cast_fp16")]; + bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; + tensor input_487_cast_fp16 = concat(axis = var_16124, interleave = input_487_interleave_0, values = (hidden_states_271_cast_fp16, var_16126_cast_fp16))[name = string("input_487_cast_fp16")]; + tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; + fp16 var_16121_to_fp16 = const()[name = string("op_16121_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_16121_to_fp16, x = input_487_cast_fp16)[name = string("normed_433_cast_fp16")]; + tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; + tensor const_922_promoted_to_fp16 = const()[name = string("const_922_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464610112)))]; + tensor hidden_states_273_cast_fp16 = mul(x = normed_435_cast_fp16, y = const_922_promoted_to_fp16)[name = string("hidden_states_273_cast_fp16")]; + tensor var_16149 = const()[name = string("op_16149"), val = tensor([0, 2, 1])]; + tensor var_16152_axes_0 = const()[name = string("op_16152_axes_0"), val = tensor([2])]; + tensor var_16150_cast_fp16 = transpose(perm = var_16149, x = hidden_states_273_cast_fp16)[name = string("transpose_8")]; + tensor var_16152_cast_fp16 = expand_dims(axes = var_16152_axes_0, x = var_16150_cast_fp16)[name = string("op_16152_cast_fp16")]; + string query_states_217_pad_type_0 = const()[name = string("query_states_217_pad_type_0"), val = string("valid")]; + tensor query_states_217_strides_0 = const()[name = string("query_states_217_strides_0"), val = tensor([1, 1])]; + tensor query_states_217_pad_0 = const()[name = string("query_states_217_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_217_dilations_0 = const()[name = string("query_states_217_dilations_0"), val = tensor([1, 1])]; + int32 query_states_217_groups_0 = const()[name = string("query_states_217_groups_0"), val = int32(1)]; + tensor query_states_217 = conv(dilations = query_states_217_dilations_0, groups = query_states_217_groups_0, pad = query_states_217_pad_0, pad_type = query_states_217_pad_type_0, strides = query_states_217_strides_0, weight = model_model_layers_27_self_attn_q_proj_weight_palettized, x = var_16152_cast_fp16)[name = string("query_states_217")]; + string key_states_271_pad_type_0 = const()[name = string("key_states_271_pad_type_0"), val = string("valid")]; + tensor key_states_271_strides_0 = const()[name = string("key_states_271_strides_0"), val = tensor([1, 1])]; + tensor key_states_271_pad_0 = const()[name = string("key_states_271_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_271_dilations_0 = const()[name = string("key_states_271_dilations_0"), val = tensor([1, 1])]; + int32 key_states_271_groups_0 = const()[name = string("key_states_271_groups_0"), val = int32(1)]; + tensor key_states_271 = conv(dilations = key_states_271_dilations_0, groups = key_states_271_groups_0, pad = key_states_271_pad_0, pad_type = key_states_271_pad_type_0, strides = key_states_271_strides_0, weight = model_model_layers_27_self_attn_k_proj_weight_palettized, x = var_16152_cast_fp16)[name = string("key_states_271")]; + string value_states_217_pad_type_0 = const()[name = string("value_states_217_pad_type_0"), val = string("valid")]; + tensor value_states_217_strides_0 = const()[name = string("value_states_217_strides_0"), val = tensor([1, 1])]; + tensor value_states_217_pad_0 = const()[name = string("value_states_217_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_217_dilations_0 = const()[name = string("value_states_217_dilations_0"), val = tensor([1, 1])]; + int32 value_states_217_groups_0 = const()[name = string("value_states_217_groups_0"), val = int32(1)]; + tensor value_states_217 = conv(dilations = value_states_217_dilations_0, groups = value_states_217_groups_0, pad = value_states_217_pad_0, pad_type = value_states_217_pad_type_0, strides = value_states_217_strides_0, weight = model_model_layers_27_self_attn_v_proj_weight_palettized, x = var_16152_cast_fp16)[name = string("value_states_217")]; + tensor var_16194 = const()[name = string("op_16194"), val = tensor([1, 16, 128, 64])]; + tensor var_16195 = reshape(shape = var_16194, x = query_states_217)[name = string("op_16195")]; + tensor var_16200 = const()[name = string("op_16200"), val = tensor([0, 1, 3, 2])]; + tensor var_16205 = const()[name = string("op_16205"), val = tensor([1, 8, 128, 64])]; + tensor var_16206 = reshape(shape = var_16205, x = key_states_271)[name = string("op_16206")]; + tensor var_16211 = const()[name = string("op_16211"), val = tensor([0, 1, 3, 2])]; + tensor var_16216 = const()[name = string("op_16216"), val = tensor([1, 8, 128, 64])]; + tensor var_16217 = reshape(shape = var_16216, x = value_states_217)[name = string("op_16217")]; + tensor var_16222 = const()[name = string("op_16222"), val = tensor([0, 1, 3, 2])]; + int32 var_16233 = const()[name = string("op_16233"), val = int32(-1)]; + fp16 const_924_promoted = const()[name = string("const_924_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_275 = transpose(perm = var_16200, x = var_16195)[name = string("transpose_7")]; + tensor var_16235 = mul(x = hidden_states_275, y = const_924_promoted)[name = string("op_16235")]; + bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; + tensor input_491 = concat(axis = var_16233, interleave = input_491_interleave_0, values = (hidden_states_275, var_16235))[name = string("input_491")]; + tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; + fp16 var_16230_to_fp16 = const()[name = string("op_16230_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_16230_to_fp16, x = input_491)[name = string("normed_437_cast_fp16")]; + tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 16, 64, 128])]; + tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; + tensor const_927 = const()[name = string("const_927"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464612224)))]; + tensor q = mul(x = normed_439, y = const_927)[name = string("q")]; + int32 var_16258 = const()[name = string("op_16258"), val = int32(-1)]; + fp16 const_928_promoted = const()[name = string("const_928_promoted"), val = fp16(-0x1p+0)]; + tensor hidden_states_277 = transpose(perm = var_16211, x = var_16206)[name = string("transpose_6")]; + tensor var_16260 = mul(x = hidden_states_277, y = const_928_promoted)[name = string("op_16260")]; + bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; + tensor input_493 = concat(axis = var_16258, interleave = input_493_interleave_0, values = (hidden_states_277, var_16260))[name = string("input_493")]; + tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; + fp16 var_16255_to_fp16 = const()[name = string("op_16255_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_16255_to_fp16, x = input_493)[name = string("normed_441_cast_fp16")]; + tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 8, 64, 128])]; + tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; + tensor const_931 = const()[name = string("const_931"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464612544)))]; + tensor k = mul(x = normed_443, y = const_931)[name = string("k")]; + tensor var_16286 = mul(x = q, y = cos_5)[name = string("op_16286")]; + tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 64, 64])]; + tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_109 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q)[name = string("x1_109")]; + tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 64, 128])]; + tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_109 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q)[name = string("x2_109")]; + fp16 const_934_promoted = const()[name = string("const_934_promoted"), val = fp16(-0x1p+0)]; + tensor var_16307 = mul(x = x2_109, y = const_934_promoted)[name = string("op_16307")]; + int32 var_16309 = const()[name = string("op_16309"), val = int32(-1)]; + bool var_16310_interleave_0 = const()[name = string("op_16310_interleave_0"), val = bool(false)]; + tensor var_16310 = concat(axis = var_16309, interleave = var_16310_interleave_0, values = (var_16307, x1_109))[name = string("op_16310")]; + tensor var_16311 = mul(x = var_16310, y = sin_5)[name = string("op_16311")]; + tensor query_states_219 = add(x = var_16286, y = var_16311)[name = string("query_states_219")]; + tensor var_16314 = mul(x = k, y = cos_5)[name = string("op_16314")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k)[name = string("x2")]; + fp16 const_937_promoted = const()[name = string("const_937_promoted"), val = fp16(-0x1p+0)]; + tensor var_16335 = mul(x = x2, y = const_937_promoted)[name = string("op_16335")]; + int32 var_16337 = const()[name = string("op_16337"), val = int32(-1)]; + bool var_16338_interleave_0 = const()[name = string("op_16338_interleave_0"), val = bool(false)]; + tensor var_16338 = concat(axis = var_16337, interleave = var_16338_interleave_0, values = (var_16335, x1))[name = string("op_16338")]; + tensor var_16339 = mul(x = var_16338, y = sin_5)[name = string("op_16339")]; + tensor key_states_273 = add(x = var_16314, y = var_16339)[name = string("key_states_273")]; + tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([27])]; + tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; + tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; + tensor expand_dims_328 = const()[name = string("expand_dims_328"), val = tensor([28])]; + int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; + bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; + tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (expand_dims_324, expand_dims_325, current_pos, expand_dims_327))[name = string("concat_488")]; + tensor concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor([0])]; + tensor concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor([0])]; + int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; + bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; + tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (expand_dims_328, concat_489_values1_0, var_1781, concat_489_values3_0))[name = string("concat_489")]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16 = slice_update(begin = concat_488, begin_mask = model_model_kv_cache_0_internal_tensor_assign_55_begin_mask_0, end = concat_489, end_mask = model_model_kv_cache_0_internal_tensor_assign_55_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_55_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_55_stride_0, update = key_states_273, x = coreml_update_state_109)[name = string("model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_55_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_110 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_110")]; + tensor expand_dims_330 = const()[name = string("expand_dims_330"), val = tensor([55])]; + tensor expand_dims_331 = const()[name = string("expand_dims_331"), val = tensor([0])]; + tensor expand_dims_333 = const()[name = string("expand_dims_333"), val = tensor([0])]; + tensor expand_dims_334 = const()[name = string("expand_dims_334"), val = tensor([56])]; + int32 concat_492_axis_0 = const()[name = string("concat_492_axis_0"), val = int32(0)]; + bool concat_492_interleave_0 = const()[name = string("concat_492_interleave_0"), val = bool(false)]; + tensor concat_492 = concat(axis = concat_492_axis_0, interleave = concat_492_interleave_0, values = (expand_dims_330, expand_dims_331, current_pos, expand_dims_333))[name = string("concat_492")]; + tensor concat_493_values1_0 = const()[name = string("concat_493_values1_0"), val = tensor([0])]; + tensor concat_493_values3_0 = const()[name = string("concat_493_values3_0"), val = tensor([0])]; + int32 concat_493_axis_0 = const()[name = string("concat_493_axis_0"), val = int32(0)]; + bool concat_493_interleave_0 = const()[name = string("concat_493_interleave_0"), val = bool(false)]; + tensor concat_493 = concat(axis = concat_493_axis_0, interleave = concat_493_interleave_0, values = (expand_dims_334, concat_493_values1_0, var_1781, concat_493_values3_0))[name = string("concat_493")]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_219 = transpose(perm = var_16222, x = var_16217)[name = string("transpose_5")]; + tensor model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16 = slice_update(begin = concat_492, begin_mask = model_model_kv_cache_0_internal_tensor_assign_56_begin_mask_0, end = concat_493, end_mask = model_model_kv_cache_0_internal_tensor_assign_56_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_56_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_56_stride_0, update = value_states_219, x = coreml_update_state_110)[name = string("model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_56_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_111 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_111")]; + tensor var_16410_begin_0 = const()[name = string("op_16410_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor var_16410_end_0 = const()[name = string("op_16410_end_0"), val = tensor([28, 8, 4096, 128])]; + tensor var_16410_end_mask_0 = const()[name = string("op_16410_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_16410_cast_fp16 = slice_by_index(begin = var_16410_begin_0, end = var_16410_end_0, end_mask = var_16410_end_mask_0, x = coreml_update_state_111)[name = string("op_16410_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_16410_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_16417_begin_0 = const()[name = string("op_16417_begin_0"), val = tensor([55, 0, 0, 0])]; + tensor var_16417_end_0 = const()[name = string("op_16417_end_0"), val = tensor([1, 8, 4096, 128])]; + tensor var_16417_end_mask_0 = const()[name = string("op_16417_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_16417_cast_fp16 = slice_by_index(begin = var_16417_begin_0, end = var_16417_end_0, end_mask = var_16417_end_mask_0, x = coreml_update_state_111)[name = string("op_16417_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_16417_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_435_axes_0 = const()[name = string("x_435_axes_0"), val = tensor([1])]; + tensor x_435_cast_fp16 = expand_dims(axes = x_435_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_435_cast_fp16")]; + tensor var_16446 = const()[name = string("op_16446"), val = tensor([1, 2, 1, 1])]; + tensor x_437_cast_fp16 = tile(reps = var_16446, x = x_435_cast_fp16)[name = string("x_437_cast_fp16")]; + tensor var_16458 = const()[name = string("op_16458"), val = tensor([1, -1, 4096, 128])]; + tensor key_states_277_cast_fp16 = reshape(shape = var_16458, x = x_437_cast_fp16)[name = string("key_states_277_cast_fp16")]; + tensor x_441_axes_0 = const()[name = string("x_441_axes_0"), val = tensor([1])]; + tensor x_441_cast_fp16 = expand_dims(axes = x_441_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_441_cast_fp16")]; + tensor var_16466 = const()[name = string("op_16466"), val = tensor([1, 2, 1, 1])]; + tensor x_443_cast_fp16 = tile(reps = var_16466, x = x_441_cast_fp16)[name = string("x_443_cast_fp16")]; + bool var_16493_transpose_x_0 = const()[name = string("op_16493_transpose_x_0"), val = bool(false)]; + bool var_16493_transpose_y_0 = const()[name = string("op_16493_transpose_y_0"), val = bool(true)]; + tensor var_16493 = matmul(transpose_x = var_16493_transpose_x_0, transpose_y = var_16493_transpose_y_0, x = query_states_219, y = key_states_277_cast_fp16)[name = string("op_16493")]; + fp16 var_16494_to_fp16 = const()[name = string("op_16494_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_109_cast_fp16 = mul(x = var_16493, y = var_16494_to_fp16)[name = string("attn_weights_109_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = causal_mask)[name = string("attn_weights_cast_fp16")]; + int32 var_16529 = const()[name = string("op_16529"), val = int32(-1)]; + tensor var_16531_cast_fp16 = softmax(axis = var_16529, x = attn_weights_cast_fp16)[name = string("op_16531_cast_fp16")]; + tensor concat_498 = const()[name = string("concat_498"), val = tensor([16, 64, 4096])]; + tensor reshape_81_cast_fp16 = reshape(shape = concat_498, x = var_16531_cast_fp16)[name = string("reshape_81_cast_fp16")]; + tensor concat_499 = const()[name = string("concat_499"), val = tensor([16, 4096, 128])]; + tensor reshape_82_cast_fp16 = reshape(shape = concat_499, x = x_443_cast_fp16)[name = string("reshape_82_cast_fp16")]; + bool matmul_27_transpose_x_0 = const()[name = string("matmul_27_transpose_x_0"), val = bool(false)]; + bool matmul_27_transpose_y_0 = const()[name = string("matmul_27_transpose_y_0"), val = bool(false)]; + tensor matmul_27_cast_fp16 = matmul(transpose_x = matmul_27_transpose_x_0, transpose_y = matmul_27_transpose_y_0, x = reshape_81_cast_fp16, y = reshape_82_cast_fp16)[name = string("matmul_27_cast_fp16")]; + tensor concat_503 = const()[name = string("concat_503"), val = tensor([1, 16, 64, 128])]; + tensor reshape_83_cast_fp16 = reshape(shape = concat_503, x = matmul_27_cast_fp16)[name = string("reshape_83_cast_fp16")]; + tensor var_16543_perm_0 = const()[name = string("op_16543_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_16562 = const()[name = string("op_16562"), val = tensor([1, 64, 2048])]; + tensor var_16543_cast_fp16 = transpose(perm = var_16543_perm_0, x = reshape_83_cast_fp16)[name = string("transpose_4")]; + tensor attn_output_275_cast_fp16 = reshape(shape = var_16562, x = var_16543_cast_fp16)[name = string("attn_output_275_cast_fp16")]; + tensor var_16567 = const()[name = string("op_16567"), val = tensor([0, 2, 1])]; + string var_16583_pad_type_0 = const()[name = string("op_16583_pad_type_0"), val = string("valid")]; + int32 var_16583_groups_0 = const()[name = string("op_16583_groups_0"), val = int32(1)]; + tensor var_16583_strides_0 = const()[name = string("op_16583_strides_0"), val = tensor([1])]; + tensor var_16583_pad_0 = const()[name = string("op_16583_pad_0"), val = tensor([0, 0])]; + tensor var_16583_dilations_0 = const()[name = string("op_16583_dilations_0"), val = tensor([1])]; + tensor squeeze_27_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464612864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466710080))))[name = string("squeeze_27_cast_fp16_to_fp32_to_fp16_palettized")]; + tensor var_16568_cast_fp16 = transpose(perm = var_16567, x = attn_output_275_cast_fp16)[name = string("transpose_3")]; + tensor var_16583_cast_fp16 = conv(dilations = var_16583_dilations_0, groups = var_16583_groups_0, pad = var_16583_pad_0, pad_type = var_16583_pad_type_0, strides = var_16583_strides_0, weight = squeeze_27_cast_fp16_to_fp32_to_fp16_palettized, x = var_16568_cast_fp16)[name = string("op_16583_cast_fp16")]; + tensor var_16587 = const()[name = string("op_16587"), val = tensor([0, 2, 1])]; + tensor attn_output_cast_fp16 = transpose(perm = var_16587, x = var_16583_cast_fp16)[name = string("transpose_2")]; + tensor hidden_states_cast_fp16 = add(x = hidden_states_271_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_cast_fp16")]; + int32 var_16600 = const()[name = string("op_16600"), val = int32(-1)]; + fp16 const_949_promoted_to_fp16 = const()[name = string("const_949_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_16602_cast_fp16 = mul(x = hidden_states_cast_fp16, y = const_949_promoted_to_fp16)[name = string("op_16602_cast_fp16")]; + bool input_497_interleave_0 = const()[name = string("input_497_interleave_0"), val = bool(false)]; + tensor input_497_cast_fp16 = concat(axis = var_16600, interleave = input_497_interleave_0, values = (hidden_states_cast_fp16, var_16602_cast_fp16))[name = string("input_497_cast_fp16")]; + tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; + fp16 var_16597_to_fp16 = const()[name = string("op_16597_to_fp16"), val = fp16(0x1.1p-20)]; + tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_16597_to_fp16, x = input_497_cast_fp16)[name = string("normed_445_cast_fp16")]; + tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; + tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 64, 1024])]; + tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; + tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_cast_fp16")]; + tensor const_952_promoted_to_fp16 = const()[name = string("const_952_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466775680)))]; + tensor x_445_cast_fp16 = mul(x = normed_cast_fp16, y = const_952_promoted_to_fp16)[name = string("x_445_cast_fp16")]; + tensor var_16627 = const()[name = string("op_16627"), val = tensor([0, 2, 1])]; + tensor input_499_axes_0 = const()[name = string("input_499_axes_0"), val = tensor([2])]; + tensor var_16628 = transpose(perm = var_16627, x = x_445_cast_fp16)[name = string("transpose_1")]; + tensor input_499 = expand_dims(axes = input_499_axes_0, x = var_16628)[name = string("input_499")]; + string input_501_pad_type_0 = const()[name = string("input_501_pad_type_0"), val = string("valid")]; + tensor input_501_strides_0 = const()[name = string("input_501_strides_0"), val = tensor([1, 1])]; + tensor input_501_pad_0 = const()[name = string("input_501_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_501_dilations_0 = const()[name = string("input_501_dilations_0"), val = tensor([1, 1])]; + int32 input_501_groups_0 = const()[name = string("input_501_groups_0"), val = int32(1)]; + tensor input_501 = conv(dilations = input_501_dilations_0, groups = input_501_groups_0, pad = input_501_pad_0, pad_type = input_501_pad_type_0, strides = input_501_strides_0, weight = model_model_layers_27_mlp_gate_proj_weight_palettized, x = input_499)[name = string("input_501")]; + string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; + tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; + tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; + int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; + tensor b = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_27_mlp_up_proj_weight_palettized, x = input_499)[name = string("b")]; + tensor c = silu(x = input_501)[name = string("c")]; + tensor input = mul(x = c, y = b)[name = string("input")]; + string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; + tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; + tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; + int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; + tensor e = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_27_mlp_down_proj_weight_palettized, x = input)[name = string("e")]; + tensor var_16650_axes_0 = const()[name = string("op_16650_axes_0"), val = tensor([2])]; + tensor var_16650 = squeeze(axes = var_16650_axes_0, x = e)[name = string("op_16650")]; + tensor var_16651 = const()[name = string("op_16651"), val = tensor([0, 2, 1])]; + tensor var_16652 = transpose(perm = var_16651, x = var_16650)[name = string("transpose_0")]; + tensor out_cast_fp16 = add(x = hidden_states_cast_fp16, y = var_16652)[name = string("out_cast_fp16")]; + tensor var_16664_begin_0 = const()[name = string("op_16664_begin_0"), val = tensor([0, 0, 0])]; + tensor var_16664_end_0 = const()[name = string("op_16664_end_0"), val = tensor([1, 1, 1024])]; + tensor var_16664_end_mask_0 = const()[name = string("op_16664_end_mask_0"), val = tensor([true, false, true])]; + tensor output_hidden_states = slice_by_index(begin = var_16664_begin_0, end = var_16664_end_0, end_mask = var_16664_end_mask_0, x = out_cast_fp16)[name = string("op_16664_cast_fp16")]; + } -> (output_hidden_states); +} \ No newline at end of file