diff --git "a/test-en-is.comet" "b/test-en-is.comet" new file mode 100644--- /dev/null +++ "b/test-en-is.comet" @@ -0,0 +1,1013 @@ +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 0 score: 0.8220 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1 score: 0.9123 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 2 score: 0.9259 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 3 score: 0.8580 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 4 score: 0.9262 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 5 score: 0.8443 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 6 score: 0.9165 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 7 score: 0.8802 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 8 score: 0.8578 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 9 score: 0.9105 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 10 score: 0.9081 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 11 score: 0.8503 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 12 score: 0.8820 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 13 score: 0.8962 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 14 score: 0.8311 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 15 score: 0.8090 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 16 score: 0.8898 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 17 score: 0.9608 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 18 score: 0.8916 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 19 score: 0.8781 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 20 score: 0.9407 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 21 score: 0.9177 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 22 score: 0.8257 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 23 score: 0.8565 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 24 score: 0.9029 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 25 score: 0.9252 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 26 score: 0.8673 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 27 score: 0.9015 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 28 score: 0.9001 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 29 score: 0.8483 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 30 score: 0.7086 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 31 score: 0.8280 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 32 score: 0.8481 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 33 score: 0.9225 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 34 score: 0.8956 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 35 score: 0.9309 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 36 score: 0.9194 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 37 score: 0.8729 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 38 score: 0.9059 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 39 score: 0.9262 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 40 score: 0.8103 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 41 score: 0.9548 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 42 score: 0.8720 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 43 score: 0.8852 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 44 score: 0.8724 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 45 score: 0.8833 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 46 score: 0.9241 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 47 score: 0.9272 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 48 score: 0.8950 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 49 score: 0.8940 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 50 score: 0.8906 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 51 score: 0.8741 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 52 score: 0.9325 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 53 score: 0.9525 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 54 score: 0.8686 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 55 score: 0.9015 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 56 score: 0.8313 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 57 score: 0.8812 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 58 score: 0.8736 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 59 score: 0.8828 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 60 score: 0.8320 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 61 score: 0.9317 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 62 score: 0.8921 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 63 score: 0.8954 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 64 score: 0.8542 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 65 score: 0.8005 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 66 score: 0.7720 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 67 score: 0.7335 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 68 score: 0.9359 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 69 score: 0.8858 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 70 score: 0.8698 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 71 score: 0.8641 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 72 score: 0.8972 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 73 score: 0.8604 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 74 score: 0.7891 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 75 score: 0.8642 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 76 score: 0.8767 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 77 score: 0.9412 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 78 score: 0.9781 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 79 score: 0.9153 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 80 score: 0.8076 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 81 score: 0.8078 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 82 score: 0.8020 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 83 score: 0.8279 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 84 score: 0.9518 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 85 score: 0.9038 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 86 score: 0.8532 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 87 score: 0.9109 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 88 score: 0.8489 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 89 score: 0.7589 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 90 score: 0.7273 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 91 score: 0.9042 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 92 score: 0.8343 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 93 score: 0.9347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 94 score: 0.9369 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 95 score: 0.8893 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 96 score: 0.8633 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 97 score: 0.7704 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 98 score: 0.8654 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 99 score: 0.8953 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 100 score: 0.8926 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 101 score: 0.8642 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 102 score: 0.8821 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 103 score: 0.9372 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 104 score: 0.9472 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 105 score: 0.7235 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 106 score: 0.9448 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 107 score: 0.9079 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 108 score: 0.8200 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 109 score: 0.8963 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 110 score: 0.8569 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 111 score: 0.8807 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 112 score: 0.9382 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 113 score: 0.9374 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 114 score: 0.8027 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 115 score: 0.8954 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 116 score: 0.9008 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 117 score: 0.8992 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 118 score: 0.9421 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 119 score: 0.9083 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 120 score: 0.8506 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 121 score: 0.8891 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 122 score: 0.9538 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 123 score: 0.9315 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 124 score: 0.9028 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 125 score: 0.9022 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 126 score: 0.8996 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 127 score: 0.8992 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 128 score: 0.9053 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 129 score: 0.8487 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 130 score: 0.9134 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 131 score: 0.8840 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 132 score: 0.8708 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 133 score: 0.9397 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 134 score: 0.7729 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 135 score: 0.7248 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 136 score: 0.9197 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 137 score: 0.8806 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 138 score: 0.8794 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 139 score: 0.9033 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 140 score: 0.8824 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 141 score: 0.9222 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 142 score: 0.9186 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 143 score: 0.8607 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 144 score: 0.8756 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 145 score: 0.9136 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 146 score: 0.8953 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 147 score: 0.9156 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 148 score: 0.8424 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 149 score: 0.8958 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 150 score: 0.9502 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 151 score: 0.8764 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 152 score: 0.8322 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 153 score: 0.8454 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 154 score: 0.8770 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 155 score: 0.8477 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 156 score: 0.8327 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 157 score: 0.8341 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 158 score: 0.8926 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 159 score: 0.7027 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 160 score: 0.9243 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 161 score: 0.8813 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 162 score: 0.8549 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 163 score: 0.8779 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 164 score: 0.9120 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 165 score: 0.8669 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 166 score: 0.9131 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 167 score: 0.8602 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 168 score: 0.8753 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 169 score: 0.8851 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 170 score: 0.8270 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 171 score: 0.9282 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 172 score: 0.6702 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 173 score: 0.8795 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 174 score: 0.8417 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 175 score: 0.8947 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 176 score: 0.8419 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 177 score: 0.8157 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 178 score: 0.8150 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 179 score: 0.9197 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 180 score: 0.9124 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 181 score: 0.9112 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 182 score: 0.8435 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 183 score: 0.8702 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 184 score: 0.9000 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 185 score: 0.9227 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 186 score: 0.9361 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 187 score: 0.8875 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 188 score: 0.8070 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 189 score: 0.8298 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 190 score: 0.8306 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 191 score: 0.9222 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 192 score: 0.7789 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 193 score: 0.8639 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 194 score: 0.8117 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 195 score: 0.9384 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 196 score: 0.9175 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 197 score: 0.8641 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 198 score: 0.8702 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 199 score: 0.8620 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 200 score: 0.8283 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 201 score: 0.8203 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 202 score: 0.8331 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 203 score: 0.8577 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 204 score: 0.9009 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 205 score: 0.8675 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 206 score: 0.8704 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 207 score: 0.8705 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 208 score: 0.9438 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 209 score: 0.7926 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 210 score: 0.9104 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 211 score: 0.9366 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 212 score: 0.8515 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 213 score: 0.9475 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 214 score: 0.8411 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 215 score: 0.8690 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 216 score: 0.8952 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 217 score: 0.8846 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 218 score: 0.8816 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 219 score: 0.9218 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 220 score: 0.9148 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 221 score: 0.7608 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 222 score: 0.9335 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 223 score: 0.8715 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 224 score: 0.9123 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 225 score: 0.9084 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 226 score: 0.9100 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 227 score: 0.6607 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 228 score: 0.8408 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 229 score: 0.8690 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 230 score: 0.9286 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 231 score: 0.8752 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 232 score: 0.8995 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 233 score: 0.8395 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 234 score: 0.8345 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 235 score: 0.9188 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 236 score: 0.8862 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 237 score: 0.8773 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 238 score: 0.9241 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 239 score: 0.8856 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 240 score: 0.8347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 241 score: 0.8085 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 242 score: 0.9341 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 243 score: 0.7611 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 244 score: 0.7553 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 245 score: 0.8576 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 246 score: 0.8943 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 247 score: 0.8262 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 248 score: 0.7455 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 249 score: 0.8711 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 250 score: 0.8809 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 251 score: 0.8971 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 252 score: 0.8402 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 253 score: 0.9581 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 254 score: 0.8747 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 255 score: 0.8611 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 256 score: 0.8811 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 257 score: 0.8909 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 258 score: 0.8261 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 259 score: 0.8958 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 260 score: 0.9257 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 261 score: 0.6936 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 262 score: 0.6453 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 263 score: 0.7852 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 264 score: 0.9276 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 265 score: 0.8973 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 266 score: 0.8841 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 267 score: 0.8706 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 268 score: 0.9471 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 269 score: 0.8317 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 270 score: 0.9205 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 271 score: 0.7874 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 272 score: 0.8592 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 273 score: 0.8813 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 274 score: 0.7627 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 275 score: 0.8131 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 276 score: 0.8001 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 277 score: 0.8756 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 278 score: 0.8875 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 279 score: 0.8672 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 280 score: 0.8817 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 281 score: 0.8935 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 282 score: 0.9148 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 283 score: 0.9602 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 284 score: 0.9660 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 285 score: 0.9609 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 286 score: 0.8775 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 287 score: 0.8810 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 288 score: 0.8918 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 289 score: 0.9197 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 290 score: 0.8647 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 291 score: 0.9089 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 292 score: 0.9114 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 293 score: 0.9118 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 294 score: 0.8564 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 295 score: 0.7906 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 296 score: 0.8483 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 297 score: 0.7913 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 298 score: 0.5606 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 299 score: 0.9181 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 300 score: 0.9209 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 301 score: 0.8128 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 302 score: 0.8355 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 303 score: 0.8975 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 304 score: 0.9195 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 305 score: 0.9090 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 306 score: 0.9185 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 307 score: 0.9090 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 308 score: 0.7698 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 309 score: 0.9164 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 310 score: 0.9270 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 311 score: 0.7455 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 312 score: 0.7734 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 313 score: 0.8673 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 314 score: 0.6829 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 315 score: 0.8712 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 316 score: 0.8749 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 317 score: 0.8936 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 318 score: 0.8807 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 319 score: 0.7314 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 320 score: 0.7592 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 321 score: 0.8908 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 322 score: 0.8575 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 323 score: 0.8613 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 324 score: 0.9142 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 325 score: 0.8690 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 326 score: 0.8593 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 327 score: 0.9053 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 328 score: 0.9187 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 329 score: 0.8985 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 330 score: 0.8449 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 331 score: 0.8443 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 332 score: 0.9368 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 333 score: 0.8724 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 334 score: 0.9896 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 335 score: 0.8883 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 336 score: 0.8832 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 337 score: 0.7344 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 338 score: 0.9140 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 339 score: 0.9015 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 340 score: 0.8834 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 341 score: 0.8461 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 342 score: 0.8698 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 343 score: 0.8296 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 344 score: 0.8060 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 345 score: 0.8615 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 346 score: 0.9202 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 347 score: 0.7861 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 348 score: 0.8729 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 349 score: 0.6968 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 350 score: 0.8428 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 351 score: 0.8195 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 352 score: 0.9032 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 353 score: 0.8690 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 354 score: 0.8411 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 355 score: 0.8672 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 356 score: 0.8701 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 357 score: 0.9442 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 358 score: 0.6426 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 359 score: 0.8589 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 360 score: 0.7900 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 361 score: 0.8243 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 362 score: 0.9000 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 363 score: 0.7913 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 364 score: 0.8475 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 365 score: 0.8814 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 366 score: 0.6536 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 367 score: 0.8719 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 368 score: 0.6744 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 369 score: 0.8890 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 370 score: 0.8449 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 371 score: 0.8012 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 372 score: 0.8317 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 373 score: 0.8341 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 374 score: 0.9327 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 375 score: 0.9044 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 376 score: 0.9346 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 377 score: 0.9126 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 378 score: 0.7550 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 379 score: 0.8935 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 380 score: 0.9043 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 381 score: 0.8953 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 382 score: 0.7549 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 383 score: 0.5719 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 384 score: 0.7816 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 385 score: 0.8356 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 386 score: 0.7945 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 387 score: 0.9372 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 388 score: 0.8229 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 389 score: 0.8475 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 390 score: 0.9253 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 391 score: 0.9120 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 392 score: 0.8936 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 393 score: 0.8804 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 394 score: 0.9163 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 395 score: 0.7807 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 396 score: 0.8466 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 397 score: 0.9286 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 398 score: 0.8276 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 399 score: 0.9429 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 400 score: 0.9243 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 401 score: 0.8937 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 402 score: 0.7561 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 403 score: 0.8290 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 404 score: 0.8002 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 405 score: 0.7345 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 406 score: 0.9062 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 407 score: 0.9164 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 408 score: 0.9117 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 409 score: 0.8685 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 410 score: 0.9112 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 411 score: 0.9059 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 412 score: 0.9004 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 413 score: 0.8248 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 414 score: 0.8011 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 415 score: 0.9014 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 416 score: 0.8898 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 417 score: 0.8757 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 418 score: 0.9064 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 419 score: 0.8913 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 420 score: 0.9544 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 421 score: 0.9263 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 422 score: 0.8616 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 423 score: 0.8332 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 424 score: 0.8637 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 425 score: 0.8930 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 426 score: 0.8954 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 427 score: 0.8458 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 428 score: 0.9313 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 429 score: 0.9232 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 430 score: 0.8697 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 431 score: 0.7682 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 432 score: 0.8426 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 433 score: 0.8576 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 434 score: 0.6712 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 435 score: 0.7961 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 436 score: 0.8418 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 437 score: 0.8682 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 438 score: 0.9498 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 439 score: 0.8415 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 440 score: 0.8175 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 441 score: 0.8829 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 442 score: 0.8402 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 443 score: 0.8460 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 444 score: 0.8663 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 445 score: 0.8654 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 446 score: 0.8971 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 447 score: 0.9231 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 448 score: 0.9268 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 449 score: 0.8490 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 450 score: 0.8780 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 451 score: 0.8547 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 452 score: 0.9435 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 453 score: 0.9085 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 454 score: 0.8432 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 455 score: 0.9194 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 456 score: 0.8515 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 457 score: 0.8634 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 458 score: 0.8999 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 459 score: 0.7631 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 460 score: 0.9093 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 461 score: 0.8972 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 462 score: 0.8778 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 463 score: 0.9485 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 464 score: 0.7004 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 465 score: 0.8787 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 466 score: 0.9141 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 467 score: 0.7771 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 468 score: 0.8343 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 469 score: 0.6544 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 470 score: 0.8002 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 471 score: 0.8842 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 472 score: 0.8696 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 473 score: 0.7924 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 474 score: 0.7774 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 475 score: 0.8840 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 476 score: 0.8826 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 477 score: 0.8139 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 478 score: 0.9085 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 479 score: 0.9473 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 480 score: 0.5350 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 481 score: 0.8500 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 482 score: 0.8823 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 483 score: 0.8019 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 484 score: 0.9036 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 485 score: 0.8622 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 486 score: 0.8962 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 487 score: 0.8741 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 488 score: 0.9003 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 489 score: 0.8878 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 490 score: 0.8771 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 491 score: 0.9307 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 492 score: 0.9475 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 493 score: 0.8933 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 494 score: 0.6998 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 495 score: 0.8931 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 496 score: 0.8279 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 497 score: 0.8979 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 498 score: 0.9500 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 499 score: 0.8462 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 500 score: 0.9356 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 501 score: 0.8954 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 502 score: 0.9178 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 503 score: 0.7865 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 504 score: 0.8421 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 505 score: 0.9059 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 506 score: 0.8466 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 507 score: 0.8635 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 508 score: 0.8600 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 509 score: 0.8780 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 510 score: 0.7019 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 511 score: 0.8591 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 512 score: 0.8696 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 513 score: 0.8899 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 514 score: 0.9344 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 515 score: 0.9441 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 516 score: 0.8374 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 517 score: 0.9554 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 518 score: 0.8238 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 519 score: 0.9060 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 520 score: 0.9099 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 521 score: 0.9125 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 522 score: 0.8676 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 523 score: 0.8577 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 524 score: 0.9046 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 525 score: 0.8752 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 526 score: 0.8319 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 527 score: 0.7241 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 528 score: 0.8485 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 529 score: 0.9030 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 530 score: 0.8347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 531 score: 0.9029 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 532 score: 0.8944 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 533 score: 0.8966 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 534 score: 0.8676 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 535 score: 0.6754 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 536 score: 0.8161 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 537 score: 0.8485 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 538 score: 0.9445 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 539 score: 0.9340 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 540 score: 0.8282 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 541 score: 0.8890 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 542 score: 0.8496 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 543 score: 0.8709 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 544 score: 0.9022 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 545 score: 0.9263 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 546 score: 0.8191 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 547 score: 0.8842 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 548 score: 0.8761 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 549 score: 0.8616 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 550 score: 0.8407 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 551 score: 0.9275 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 552 score: 0.9241 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 553 score: 0.8642 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 554 score: 0.8593 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 555 score: 0.8027 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 556 score: 0.8082 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 557 score: 0.8864 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 558 score: 0.7034 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 559 score: 0.8233 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 560 score: 0.8951 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 561 score: 0.9055 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 562 score: 0.8721 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 563 score: 0.7579 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 564 score: 0.7999 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 565 score: 0.8828 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 566 score: 0.9239 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 567 score: 0.9068 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 568 score: 0.8182 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 569 score: 0.7977 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 570 score: 0.9041 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 571 score: 0.8764 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 572 score: 0.7665 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 573 score: 0.8437 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 574 score: 0.9599 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 575 score: 0.7520 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 576 score: 0.7320 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 577 score: 0.8862 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 578 score: 0.8617 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 579 score: 0.9127 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 580 score: 0.8565 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 581 score: 0.8485 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 582 score: 0.9088 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 583 score: 0.8129 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 584 score: 0.5779 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 585 score: 0.8634 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 586 score: 0.9314 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 587 score: 0.9643 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 588 score: 0.8287 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 589 score: 0.9408 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 590 score: 0.7902 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 591 score: 0.9209 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 592 score: 0.7733 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 593 score: 0.9384 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 594 score: 0.8805 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 595 score: 0.9020 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 596 score: 0.7906 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 597 score: 0.8442 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 598 score: 0.7778 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 599 score: 0.9063 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 600 score: 0.8105 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 601 score: 0.8622 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 602 score: 0.7939 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 603 score: 0.9061 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 604 score: 0.9340 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 605 score: 0.8355 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 606 score: 0.7439 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 607 score: 0.8233 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 608 score: 0.8137 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 609 score: 0.8735 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 610 score: 0.9376 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 611 score: 0.8853 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 612 score: 0.8085 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 613 score: 0.9020 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 614 score: 0.9342 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 615 score: 0.8604 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 616 score: 0.9115 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 617 score: 0.9199 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 618 score: 0.7985 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 619 score: 0.7265 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 620 score: 0.9175 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 621 score: 0.6556 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 622 score: 0.9110 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 623 score: 0.8917 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 624 score: 0.8056 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 625 score: 0.9074 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 626 score: 0.8777 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 627 score: 0.9314 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 628 score: 0.9355 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 629 score: 0.8824 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 630 score: 0.8943 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 631 score: 0.9296 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 632 score: 0.9215 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 633 score: 0.9432 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 634 score: 0.9628 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 635 score: 0.8747 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 636 score: 0.9241 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 637 score: 0.8774 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 638 score: 0.8641 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 639 score: 0.9430 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 640 score: 0.8575 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 641 score: 0.8932 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 642 score: 0.9307 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 643 score: 0.9520 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 644 score: 0.8776 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 645 score: 0.8742 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 646 score: 0.8804 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 647 score: 0.8908 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 648 score: 0.8250 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 649 score: 0.8641 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 650 score: 0.9563 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 651 score: 0.8802 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 652 score: 0.8092 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 653 score: 0.7995 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 654 score: 0.8575 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 655 score: 0.9194 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 656 score: 0.8968 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 657 score: 0.8552 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 658 score: 0.7922 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 659 score: 0.8825 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 660 score: 0.7752 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 661 score: 0.8553 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 662 score: 0.9155 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 663 score: 0.6715 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 664 score: 0.8808 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 665 score: 0.8816 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 666 score: 0.8125 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 667 score: 0.9089 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 668 score: 0.8851 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 669 score: 0.8245 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 670 score: 0.9109 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 671 score: 0.8354 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 672 score: 0.8608 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 673 score: 0.8923 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 674 score: 0.8871 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 675 score: 0.8727 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 676 score: 0.9167 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 677 score: 0.9402 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 678 score: 0.8593 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 679 score: 0.9149 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 680 score: 0.7979 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 681 score: 0.8766 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 682 score: 0.9245 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 683 score: 0.8646 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 684 score: 0.8722 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 685 score: 0.8629 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 686 score: 0.9085 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 687 score: 0.8845 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 688 score: 0.9117 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 689 score: 0.8756 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 690 score: 0.9333 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 691 score: 0.8545 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 692 score: 0.9106 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 693 score: 0.8986 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 694 score: 0.8363 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 695 score: 0.7595 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 696 score: 0.8428 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 697 score: 0.9388 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 698 score: 0.8305 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 699 score: 0.9575 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 700 score: 0.9108 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 701 score: 0.8806 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 702 score: 0.9101 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 703 score: 0.9108 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 704 score: 0.9085 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 705 score: 0.8769 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 706 score: 0.7912 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 707 score: 0.8755 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 708 score: 0.9308 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 709 score: 0.9259 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 710 score: 0.9252 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 711 score: 0.8969 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 712 score: 0.9526 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 713 score: 0.8955 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 714 score: 0.8598 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 715 score: 0.8839 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 716 score: 0.8424 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 717 score: 0.8629 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 718 score: 0.9296 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 719 score: 0.8950 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 720 score: 0.8224 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 721 score: 0.9375 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 722 score: 0.9150 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 723 score: 0.8937 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 724 score: 0.8353 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 725 score: 0.8175 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 726 score: 0.9003 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 727 score: 0.8234 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 728 score: 0.8202 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 729 score: 0.8139 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 730 score: 0.8779 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 731 score: 0.9183 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 732 score: 0.8211 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 733 score: 0.9138 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 734 score: 0.8675 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 735 score: 0.8726 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 736 score: 0.8573 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 737 score: 0.8676 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 738 score: 0.8947 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 739 score: 0.7889 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 740 score: 0.9373 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 741 score: 0.8631 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 742 score: 0.8714 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 743 score: 0.9225 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 744 score: 0.9026 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 745 score: 0.9160 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 746 score: 0.8880 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 747 score: 0.8421 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 748 score: 0.9523 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 749 score: 0.9193 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 750 score: 0.8514 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 751 score: 0.9242 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 752 score: 0.8594 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 753 score: 0.9064 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 754 score: 0.9323 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 755 score: 0.9229 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 756 score: 0.8800 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 757 score: 0.8802 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 758 score: 0.8885 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 759 score: 0.9271 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 760 score: 0.9263 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 761 score: 0.9318 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 762 score: 0.9321 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 763 score: 0.8637 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 764 score: 0.8480 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 765 score: 0.8433 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 766 score: 0.8883 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 767 score: 0.8551 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 768 score: 0.9380 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 769 score: 0.8960 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 770 score: 0.9383 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 771 score: 0.9031 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 772 score: 0.8758 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 773 score: 0.9207 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 774 score: 0.8936 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 775 score: 0.8575 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 776 score: 0.8625 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 777 score: 0.9458 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 778 score: 0.9142 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 779 score: 0.8746 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 780 score: 0.8337 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 781 score: 0.8566 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 782 score: 0.9004 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 783 score: 0.9031 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 784 score: 0.9154 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 785 score: 0.8602 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 786 score: 0.9471 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 787 score: 0.8563 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 788 score: 0.8866 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 789 score: 0.9037 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 790 score: 0.9389 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 791 score: 0.9451 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 792 score: 0.9047 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 793 score: 0.9105 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 794 score: 0.8806 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 795 score: 0.8753 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 796 score: 0.9394 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 797 score: 0.8373 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 798 score: 0.9078 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 799 score: 0.8236 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 800 score: 0.9370 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 801 score: 0.8151 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 802 score: 0.8625 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 803 score: 0.8502 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 804 score: 0.6731 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 805 score: 0.9015 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 806 score: 0.9159 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 807 score: 0.9451 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 808 score: 0.9170 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 809 score: 0.8452 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 810 score: 0.8425 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 811 score: 0.8463 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 812 score: 0.8764 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 813 score: 0.9140 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 814 score: 0.9116 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 815 score: 0.8755 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 816 score: 0.8646 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 817 score: 0.7817 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 818 score: 0.7814 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 819 score: 0.8278 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 820 score: 0.9177 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 821 score: 0.9148 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 822 score: 0.8698 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 823 score: 0.8578 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 824 score: 0.9073 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 825 score: 0.8688 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 826 score: 0.9385 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 827 score: 0.9206 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 828 score: 0.8753 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 829 score: 0.8650 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 830 score: 0.8771 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 831 score: 0.8741 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 832 score: 0.8664 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 833 score: 0.7684 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 834 score: 0.8713 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 835 score: 0.8151 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 836 score: 0.9043 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 837 score: 0.9041 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 838 score: 0.8755 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 839 score: 0.8144 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 840 score: 0.8559 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 841 score: 0.8519 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 842 score: 0.6021 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 843 score: 0.8990 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 844 score: 0.7962 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 845 score: 0.8077 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 846 score: 0.6786 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 847 score: 0.8933 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 848 score: 0.8669 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 849 score: 0.8033 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 850 score: 0.8785 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 851 score: 0.9297 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 852 score: 0.8887 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 853 score: 0.8475 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 854 score: 0.8173 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 855 score: 0.9172 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 856 score: 0.8515 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 857 score: 0.8052 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 858 score: 0.9542 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 859 score: 0.9352 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 860 score: 0.9086 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 861 score: 0.9269 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 862 score: 0.9305 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 863 score: 0.8895 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 864 score: 0.8862 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 865 score: 0.8339 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 866 score: 0.8790 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 867 score: 0.8892 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 868 score: 0.7370 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 869 score: 0.8682 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 870 score: 0.9412 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 871 score: 0.8716 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 872 score: 0.9231 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 873 score: 0.9175 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 874 score: 0.9141 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 875 score: 0.8365 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 876 score: 0.8873 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 877 score: 0.9170 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 878 score: 0.9278 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 879 score: 0.8357 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 880 score: 0.8353 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 881 score: 0.9311 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 882 score: 0.8865 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 883 score: 0.9197 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 884 score: 0.7996 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 885 score: 0.8378 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 886 score: 0.9451 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 887 score: 0.8468 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 888 score: 0.6016 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 889 score: 0.8207 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 890 score: 0.9071 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 891 score: 0.8363 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 892 score: 0.6723 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 893 score: 0.8116 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 894 score: 0.9193 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 895 score: 0.9302 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 896 score: 0.9143 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 897 score: 0.9262 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 898 score: 0.9172 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 899 score: 0.8537 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 900 score: 0.8639 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 901 score: 0.8526 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 902 score: 0.8700 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 903 score: 0.7832 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 904 score: 0.9126 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 905 score: 0.8727 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 906 score: 0.8832 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 907 score: 0.8840 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 908 score: 0.8805 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 909 score: 0.9300 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 910 score: 0.8347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 911 score: 0.8686 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 912 score: 0.9269 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 913 score: 0.7058 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 914 score: 0.8622 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 915 score: 0.8177 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 916 score: 0.8877 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 917 score: 0.8833 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 918 score: 0.8353 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 919 score: 0.8633 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 920 score: 0.7994 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 921 score: 0.8976 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 922 score: 0.9006 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 923 score: 0.8773 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 924 score: 0.8419 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 925 score: 0.8925 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 926 score: 0.9032 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 927 score: 0.8617 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 928 score: 0.9311 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 929 score: 0.8698 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 930 score: 0.8437 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 931 score: 0.8514 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 932 score: 0.8398 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 933 score: 0.8123 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 934 score: 0.7898 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 935 score: 0.8577 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 936 score: 0.8675 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 937 score: 0.9469 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 938 score: 0.9265 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 939 score: 0.9107 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 940 score: 0.9198 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 941 score: 0.9507 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 942 score: 0.9269 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 943 score: 0.9219 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 944 score: 0.8735 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 945 score: 0.9259 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 946 score: 0.8489 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 947 score: 0.9255 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 948 score: 0.9100 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 949 score: 0.8858 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 950 score: 0.7967 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 951 score: 0.8900 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 952 score: 0.8728 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 953 score: 0.9111 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 954 score: 0.8700 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 955 score: 0.9376 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 956 score: 0.9286 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 957 score: 0.9233 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 958 score: 0.9346 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 959 score: 0.9391 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 960 score: 0.7789 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 961 score: 0.8804 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 962 score: 0.8619 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 963 score: 0.8254 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 964 score: 0.9628 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 965 score: 0.8527 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 966 score: 0.8732 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 967 score: 0.8977 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 968 score: 0.7869 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 969 score: 0.8110 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 970 score: 0.9078 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 971 score: 0.8897 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 972 score: 0.8120 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 973 score: 0.7968 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 974 score: 0.8411 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 975 score: 0.8790 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 976 score: 0.8326 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 977 score: 0.8072 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 978 score: 0.8551 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 979 score: 0.8665 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 980 score: 0.8789 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 981 score: 0.9101 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 982 score: 0.9400 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 983 score: 0.8719 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 984 score: 0.8741 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 985 score: 0.8667 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 986 score: 0.8899 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 987 score: 0.8739 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 988 score: 0.7793 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 989 score: 0.7173 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 990 score: 0.8785 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 991 score: 0.8409 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 992 score: 0.9077 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 993 score: 0.9041 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 994 score: 0.8369 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 995 score: 0.8786 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 996 score: 0.9362 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 997 score: 0.9410 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 998 score: 0.8476 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 999 score: 0.9089 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1000 score: 0.9215 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1001 score: 0.8808 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1002 score: 0.8034 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1003 score: 0.8492 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1004 score: 0.9064 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1005 score: 0.8570 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1006 score: 0.9344 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1007 score: 0.8949 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1008 score: 0.9199 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1009 score: 0.8741 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1010 score: 0.9012 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is Segment 1011 score: 0.9111 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-is-max-tokens-512//test-en-is score: 0.8689