diff --git "a/test-en-no.comet" "b/test-en-no.comet" new file mode 100644--- /dev/null +++ "b/test-en-no.comet" @@ -0,0 +1,1013 @@ +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 0 score: 0.8795 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1 score: 0.8921 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 2 score: 0.8905 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 3 score: 0.8124 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 4 score: 0.8953 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 5 score: 0.8661 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 6 score: 0.8634 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 7 score: 0.8942 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 8 score: 0.8021 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 9 score: 0.8080 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 10 score: 0.9438 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 11 score: 0.8160 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 12 score: 0.9094 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 13 score: 0.8186 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 14 score: 0.8616 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 15 score: 0.7965 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 16 score: 0.8318 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 17 score: 0.9413 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 18 score: 0.9137 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 19 score: 0.9002 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 20 score: 0.9426 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 21 score: 0.9382 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 22 score: 0.9343 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 23 score: 0.9114 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 24 score: 0.8869 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 25 score: 0.8741 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 26 score: 0.9114 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 27 score: 0.8954 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 28 score: 0.8304 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 29 score: 0.7799 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 30 score: 0.6990 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 31 score: 0.8207 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 32 score: 0.7512 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 33 score: 0.9095 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 34 score: 0.7472 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 35 score: 0.9483 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 36 score: 0.9368 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 37 score: 0.8582 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 38 score: 0.8869 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 39 score: 0.8898 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 40 score: 0.8839 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 41 score: 0.9308 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 42 score: 0.9048 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 43 score: 0.9146 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 44 score: 0.7907 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 45 score: 0.8226 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 46 score: 0.8859 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 47 score: 0.8783 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 48 score: 0.7882 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 49 score: 0.8342 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 50 score: 0.9042 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 51 score: 0.8850 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 52 score: 0.8344 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 53 score: 0.8997 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 54 score: 0.9057 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 55 score: 0.9179 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 56 score: 0.8998 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 57 score: 0.8430 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 58 score: 0.8628 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 59 score: 0.9215 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 60 score: 0.9029 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 61 score: 0.9452 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 62 score: 0.9034 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 63 score: 0.7701 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 64 score: 0.8347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 65 score: 0.7273 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 66 score: 0.6673 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 67 score: 0.7709 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 68 score: 0.8715 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 69 score: 0.8891 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 70 score: 0.9423 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 71 score: 0.8846 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 72 score: 0.8775 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 73 score: 0.9092 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 74 score: 0.8370 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 75 score: 0.9037 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 76 score: 0.8172 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 77 score: 0.8775 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 78 score: 0.9201 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 79 score: 0.8659 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 80 score: 0.8857 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 81 score: 0.9483 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 82 score: 0.6114 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 83 score: 0.8083 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 84 score: 0.8944 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 85 score: 0.9092 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 86 score: 0.8341 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 87 score: 0.8233 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 88 score: 0.9377 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 89 score: 0.8195 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 90 score: 0.8550 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 91 score: 0.8686 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 92 score: 0.8309 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 93 score: 0.7588 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 94 score: 0.9204 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 95 score: 0.9542 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 96 score: 0.6813 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 97 score: 0.8677 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 98 score: 0.9570 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 99 score: 0.9059 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 100 score: 0.9370 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 101 score: 0.9108 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 102 score: 0.8811 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 103 score: 0.9202 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 104 score: 0.9143 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 105 score: 0.6815 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 106 score: 0.9094 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 107 score: 0.8965 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 108 score: 0.8722 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 109 score: 0.7691 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 110 score: 0.8063 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 111 score: 0.9255 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 112 score: 0.9167 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 113 score: 0.9322 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 114 score: 0.8757 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 115 score: 0.8923 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 116 score: 0.9220 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 117 score: 0.8512 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 118 score: 0.9079 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 119 score: 0.8857 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 120 score: 0.8372 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 121 score: 0.8331 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 122 score: 0.9524 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 123 score: 0.9405 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 124 score: 0.9445 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 125 score: 0.9427 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 126 score: 0.9605 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 127 score: 0.8635 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 128 score: 0.9082 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 129 score: 0.9214 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 130 score: 0.8918 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 131 score: 0.9118 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 132 score: 0.9010 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 133 score: 0.9262 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 134 score: 0.8979 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 135 score: 0.8870 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 136 score: 0.9415 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 137 score: 0.9327 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 138 score: 0.9528 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 139 score: 0.8689 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 140 score: 0.9058 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 141 score: 0.9163 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 142 score: 0.9388 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 143 score: 0.9064 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 144 score: 0.9287 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 145 score: 0.9273 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 146 score: 0.8873 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 147 score: 0.9317 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 148 score: 0.9124 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 149 score: 0.9334 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 150 score: 0.8943 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 151 score: 0.9086 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 152 score: 0.7207 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 153 score: 0.7834 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 154 score: 0.7981 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 155 score: 0.9029 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 156 score: 0.8297 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 157 score: 0.8296 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 158 score: 0.9376 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 159 score: 0.8754 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 160 score: 0.9267 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 161 score: 0.7999 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 162 score: 0.7519 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 163 score: 0.8258 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 164 score: 0.8920 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 165 score: 0.8259 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 166 score: 0.9215 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 167 score: 0.8989 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 168 score: 0.8565 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 169 score: 0.8384 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 170 score: 0.8935 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 171 score: 0.8993 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 172 score: 0.7941 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 173 score: 0.9021 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 174 score: 0.9056 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 175 score: 0.9604 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 176 score: 0.8794 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 177 score: 0.9056 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 178 score: 0.8322 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 179 score: 0.9278 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 180 score: 0.9112 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 181 score: 0.9346 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 182 score: 0.9247 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 183 score: 0.9551 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 184 score: 0.6743 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 185 score: 0.8737 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 186 score: 0.9532 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 187 score: 0.9114 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 188 score: 0.8562 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 189 score: 0.9038 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 190 score: 0.6949 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 191 score: 0.8735 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 192 score: 0.8082 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 193 score: 0.7585 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 194 score: 0.7327 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 195 score: 0.9285 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 196 score: 0.8996 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 197 score: 0.8449 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 198 score: 0.8733 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 199 score: 0.8074 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 200 score: 0.9064 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 201 score: 0.8005 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 202 score: 0.9210 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 203 score: 0.8157 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 204 score: 0.9325 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 205 score: 0.9008 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 206 score: 0.9521 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 207 score: 0.9083 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 208 score: 0.9067 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 209 score: 0.9082 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 210 score: 0.9466 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 211 score: 0.9193 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 212 score: 0.9193 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 213 score: 0.9451 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 214 score: 0.9005 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 215 score: 0.8917 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 216 score: 0.7765 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 217 score: 0.9269 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 218 score: 0.9190 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 219 score: 0.7689 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 220 score: 0.9544 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 221 score: 0.7991 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 222 score: 0.9210 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 223 score: 0.8082 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 224 score: 0.9502 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 225 score: 0.9571 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 226 score: 0.8995 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 227 score: 0.7877 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 228 score: 0.8584 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 229 score: 0.9417 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 230 score: 0.9454 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 231 score: 0.8610 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 232 score: 0.9014 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 233 score: 0.8248 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 234 score: 0.8183 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 235 score: 0.8389 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 236 score: 0.8515 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 237 score: 0.8809 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 238 score: 0.9255 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 239 score: 0.8790 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 240 score: 0.8354 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 241 score: 0.8066 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 242 score: 0.9243 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 243 score: 0.4542 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 244 score: 0.8268 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 245 score: 0.9094 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 246 score: 0.6858 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 247 score: 0.8313 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 248 score: 0.7296 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 249 score: 0.9403 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 250 score: 0.8925 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 251 score: 0.9522 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 252 score: 0.9317 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 253 score: 0.9188 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 254 score: 0.8099 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 255 score: 0.8957 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 256 score: 0.7526 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 257 score: 0.9378 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 258 score: 0.8729 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 259 score: 0.9250 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 260 score: 0.9406 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 261 score: 0.8263 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 262 score: 0.7381 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 263 score: 0.8886 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 264 score: 0.9313 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 265 score: 0.9021 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 266 score: 0.8433 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 267 score: 0.8782 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 268 score: 0.9111 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 269 score: 0.9023 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 270 score: 0.8977 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 271 score: 0.6862 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 272 score: 0.9365 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 273 score: 0.7574 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 274 score: 0.6734 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 275 score: 0.8233 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 276 score: 0.7221 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 277 score: 0.8672 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 278 score: 0.8925 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 279 score: 0.9648 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 280 score: 0.9372 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 281 score: 0.9297 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 282 score: 0.8728 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 283 score: 0.9133 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 284 score: 0.9439 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 285 score: 0.9339 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 286 score: 0.8417 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 287 score: 0.8744 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 288 score: 0.9219 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 289 score: 0.9033 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 290 score: 0.8534 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 291 score: 0.8679 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 292 score: 0.8805 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 293 score: 0.9062 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 294 score: 0.8181 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 295 score: 0.9167 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 296 score: 0.8624 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 297 score: 0.8904 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 298 score: 0.9269 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 299 score: 0.8823 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 300 score: 0.9480 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 301 score: 0.8429 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 302 score: 0.8303 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 303 score: 0.9143 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 304 score: 0.9182 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 305 score: 0.9339 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 306 score: 0.9531 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 307 score: 0.8638 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 308 score: 0.8786 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 309 score: 0.8619 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 310 score: 0.9289 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 311 score: 0.9389 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 312 score: 0.9479 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 313 score: 0.7391 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 314 score: 0.9519 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 315 score: 0.8992 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 316 score: 0.9360 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 317 score: 0.8494 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 318 score: 0.7814 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 319 score: 0.8641 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 320 score: 0.7450 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 321 score: 0.8980 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 322 score: 0.9089 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 323 score: 0.9287 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 324 score: 0.9400 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 325 score: 0.8808 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 326 score: 0.8069 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 327 score: 0.9276 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 328 score: 0.8576 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 329 score: 0.8780 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 330 score: 0.8586 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 331 score: 0.8779 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 332 score: 0.9171 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 333 score: 0.8679 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 334 score: 0.9446 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 335 score: 0.9059 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 336 score: 0.9121 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 337 score: 0.8848 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 338 score: 0.8905 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 339 score: 0.9036 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 340 score: 0.7796 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 341 score: 0.8747 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 342 score: 0.6285 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 343 score: 0.8330 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 344 score: 0.9026 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 345 score: 0.9227 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 346 score: 0.8638 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 347 score: 0.9107 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 348 score: 0.7505 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 349 score: 0.7101 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 350 score: 0.7477 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 351 score: 0.9268 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 352 score: 0.9145 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 353 score: 0.9223 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 354 score: 0.8546 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 355 score: 0.8163 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 356 score: 0.9056 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 357 score: 0.9142 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 358 score: 0.9282 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 359 score: 0.8530 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 360 score: 0.8349 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 361 score: 0.8769 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 362 score: 0.8268 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 363 score: 0.6390 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 364 score: 0.8865 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 365 score: 0.7887 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 366 score: 0.8082 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 367 score: 0.9126 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 368 score: 0.8211 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 369 score: 0.6845 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 370 score: 0.6926 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 371 score: 0.7999 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 372 score: 0.8849 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 373 score: 0.8755 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 374 score: 0.9390 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 375 score: 0.9231 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 376 score: 0.9429 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 377 score: 0.9196 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 378 score: 0.8080 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 379 score: 0.6776 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 380 score: 0.9437 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 381 score: 0.8873 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 382 score: 0.6979 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 383 score: 0.7351 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 384 score: 0.8550 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 385 score: 0.8223 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 386 score: 0.8749 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 387 score: 0.9521 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 388 score: 0.8519 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 389 score: 0.8076 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 390 score: 0.9371 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 391 score: 0.8869 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 392 score: 0.9165 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 393 score: 0.9074 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 394 score: 0.9253 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 395 score: 0.9049 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 396 score: 0.6028 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 397 score: 0.8123 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 398 score: 0.6756 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 399 score: 0.9399 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 400 score: 0.9424 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 401 score: 0.8252 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 402 score: 0.6666 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 403 score: 0.7347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 404 score: 0.8673 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 405 score: 0.8302 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 406 score: 0.9203 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 407 score: 0.9084 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 408 score: 0.9565 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 409 score: 0.9203 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 410 score: 0.9210 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 411 score: 0.8897 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 412 score: 0.9061 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 413 score: 0.9479 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 414 score: 0.7853 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 415 score: 0.8848 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 416 score: 0.8432 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 417 score: 0.8963 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 418 score: 0.8813 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 419 score: 0.9026 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 420 score: 0.8693 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 421 score: 0.9519 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 422 score: 0.9025 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 423 score: 0.8945 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 424 score: 0.8461 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 425 score: 0.9343 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 426 score: 0.9073 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 427 score: 0.9174 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 428 score: 0.9351 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 429 score: 0.9683 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 430 score: 0.8632 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 431 score: 0.7518 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 432 score: 0.8763 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 433 score: 0.8541 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 434 score: 0.8847 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 435 score: 0.8043 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 436 score: 0.9197 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 437 score: 0.9262 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 438 score: 0.9357 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 439 score: 0.9136 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 440 score: 0.8686 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 441 score: 0.8704 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 442 score: 0.9199 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 443 score: 0.8947 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 444 score: 0.8340 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 445 score: 0.9079 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 446 score: 0.8519 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 447 score: 0.9199 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 448 score: 0.9299 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 449 score: 0.7921 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 450 score: 0.9007 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 451 score: 0.8767 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 452 score: 0.9270 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 453 score: 0.9090 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 454 score: 0.8865 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 455 score: 0.8221 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 456 score: 0.8953 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 457 score: 0.9123 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 458 score: 0.8825 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 459 score: 0.8103 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 460 score: 0.9409 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 461 score: 0.8992 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 462 score: 0.8633 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 463 score: 0.9640 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 464 score: 0.8749 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 465 score: 0.9052 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 466 score: 0.9327 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 467 score: 0.7301 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 468 score: 0.7150 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 469 score: 0.7317 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 470 score: 0.7488 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 471 score: 0.8398 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 472 score: 0.9124 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 473 score: 0.8894 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 474 score: 0.6683 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 475 score: 0.9021 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 476 score: 0.8883 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 477 score: 0.9015 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 478 score: 0.8609 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 479 score: 0.9322 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 480 score: 0.8932 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 481 score: 0.6042 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 482 score: 0.8830 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 483 score: 0.8488 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 484 score: 0.8742 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 485 score: 0.9063 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 486 score: 0.9210 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 487 score: 0.7373 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 488 score: 0.8405 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 489 score: 0.8372 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 490 score: 0.7884 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 491 score: 0.9455 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 492 score: 0.9047 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 493 score: 0.9358 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 494 score: 0.9240 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 495 score: 0.9022 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 496 score: 0.8962 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 497 score: 0.8417 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 498 score: 0.9072 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 499 score: 0.8718 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 500 score: 0.9519 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 501 score: 0.9136 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 502 score: 0.9313 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 503 score: 0.8613 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 504 score: 0.8276 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 505 score: 0.7791 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 506 score: 0.8750 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 507 score: 0.8146 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 508 score: 0.8009 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 509 score: 0.8887 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 510 score: 0.7432 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 511 score: 0.8057 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 512 score: 0.8840 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 513 score: 0.8555 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 514 score: 0.9060 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 515 score: 0.8390 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 516 score: 0.7858 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 517 score: 0.9389 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 518 score: 0.8383 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 519 score: 0.9581 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 520 score: 0.9345 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 521 score: 0.9345 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 522 score: 0.9146 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 523 score: 0.8654 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 524 score: 0.8605 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 525 score: 0.8633 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 526 score: 0.7155 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 527 score: 0.8568 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 528 score: 0.8224 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 529 score: 0.8812 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 530 score: 0.8776 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 531 score: 0.8892 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 532 score: 0.9162 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 533 score: 0.8221 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 534 score: 0.7882 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 535 score: 0.5832 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 536 score: 0.8508 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 537 score: 0.8025 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 538 score: 0.8952 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 539 score: 0.9007 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 540 score: 0.8665 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 541 score: 0.9096 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 542 score: 0.9003 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 543 score: 0.8836 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 544 score: 0.8976 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 545 score: 0.9663 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 546 score: 0.8523 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 547 score: 0.8384 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 548 score: 0.8939 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 549 score: 0.9218 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 550 score: 0.8796 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 551 score: 0.9235 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 552 score: 0.9337 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 553 score: 0.8719 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 554 score: 0.9060 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 555 score: 0.7725 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 556 score: 0.8567 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 557 score: 0.9352 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 558 score: 0.8660 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 559 score: 0.8302 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 560 score: 0.8624 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 561 score: 0.8764 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 562 score: 0.7928 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 563 score: 0.8478 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 564 score: 0.8014 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 565 score: 0.9037 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 566 score: 0.9020 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 567 score: 0.8985 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 568 score: 0.8521 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 569 score: 0.6578 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 570 score: 0.9243 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 571 score: 0.8997 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 572 score: 0.8799 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 573 score: 0.9403 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 574 score: 0.8744 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 575 score: 0.8915 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 576 score: 0.8497 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 577 score: 0.9291 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 578 score: 0.9145 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 579 score: 0.8478 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 580 score: 0.8807 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 581 score: 0.8772 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 582 score: 0.8710 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 583 score: 0.8750 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 584 score: 0.8789 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 585 score: 0.9185 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 586 score: 0.9001 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 587 score: 0.9493 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 588 score: 0.7951 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 589 score: 0.9575 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 590 score: 0.9356 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 591 score: 0.8541 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 592 score: 0.7968 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 593 score: 0.9443 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 594 score: 0.9235 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 595 score: 0.7612 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 596 score: 0.9162 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 597 score: 0.8916 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 598 score: 0.8562 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 599 score: 0.9413 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 600 score: 0.8768 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 601 score: 0.8816 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 602 score: 0.9054 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 603 score: 0.9032 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 604 score: 0.8417 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 605 score: 0.9419 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 606 score: 0.9357 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 607 score: 0.7873 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 608 score: 0.7349 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 609 score: 0.9003 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 610 score: 0.8760 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 611 score: 0.9234 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 612 score: 0.8772 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 613 score: 0.8301 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 614 score: 0.8637 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 615 score: 0.7939 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 616 score: 0.9200 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 617 score: 0.9533 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 618 score: 0.8640 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 619 score: 0.8787 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 620 score: 0.9052 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 621 score: 0.7724 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 622 score: 0.9301 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 623 score: 0.9258 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 624 score: 0.8968 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 625 score: 0.9425 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 626 score: 0.9311 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 627 score: 0.9225 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 628 score: 0.9302 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 629 score: 0.8868 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 630 score: 0.8953 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 631 score: 0.9406 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 632 score: 0.9172 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 633 score: 0.9517 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 634 score: 0.9291 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 635 score: 0.8951 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 636 score: 0.9184 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 637 score: 0.8789 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 638 score: 0.8717 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 639 score: 0.9410 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 640 score: 0.9138 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 641 score: 0.9692 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 642 score: 0.8433 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 643 score: 0.8880 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 644 score: 0.8235 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 645 score: 0.6114 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 646 score: 0.8769 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 647 score: 0.8809 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 648 score: 0.8471 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 649 score: 0.8352 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 650 score: 0.8659 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 651 score: 0.8543 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 652 score: 0.8724 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 653 score: 0.7956 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 654 score: 0.9323 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 655 score: 0.9136 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 656 score: 0.9190 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 657 score: 0.9555 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 658 score: 0.7746 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 659 score: 0.9328 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 660 score: 0.8140 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 661 score: 0.8738 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 662 score: 0.9316 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 663 score: 0.8592 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 664 score: 0.9384 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 665 score: 0.8422 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 666 score: 0.8342 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 667 score: 0.9466 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 668 score: 0.9165 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 669 score: 0.8901 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 670 score: 0.9255 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 671 score: 0.9400 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 672 score: 0.9165 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 673 score: 0.9305 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 674 score: 0.8021 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 675 score: 0.9438 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 676 score: 0.9297 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 677 score: 0.9329 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 678 score: 0.8945 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 679 score: 0.8870 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 680 score: 0.8416 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 681 score: 0.9451 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 682 score: 0.9297 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 683 score: 0.8363 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 684 score: 0.9073 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 685 score: 0.8582 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 686 score: 0.8714 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 687 score: 0.8642 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 688 score: 0.9016 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 689 score: 0.8945 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 690 score: 0.9296 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 691 score: 0.8821 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 692 score: 0.9561 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 693 score: 0.8514 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 694 score: 0.9001 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 695 score: 0.9503 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 696 score: 0.9030 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 697 score: 0.9198 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 698 score: 0.8879 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 699 score: 0.9307 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 700 score: 0.8809 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 701 score: 0.9185 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 702 score: 0.9617 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 703 score: 0.8722 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 704 score: 0.6902 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 705 score: 0.8753 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 706 score: 0.6926 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 707 score: 0.8824 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 708 score: 0.9090 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 709 score: 0.9121 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 710 score: 0.9415 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 711 score: 0.8115 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 712 score: 0.9442 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 713 score: 0.8746 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 714 score: 0.9039 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 715 score: 0.8625 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 716 score: 0.9363 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 717 score: 0.8874 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 718 score: 0.9380 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 719 score: 0.8762 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 720 score: 0.9270 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 721 score: 0.9463 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 722 score: 0.9190 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 723 score: 0.9285 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 724 score: 0.9102 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 725 score: 0.7120 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 726 score: 0.8749 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 727 score: 0.8662 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 728 score: 0.7493 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 729 score: 0.8415 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 730 score: 0.8046 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 731 score: 0.9352 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 732 score: 0.8696 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 733 score: 0.9381 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 734 score: 0.7539 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 735 score: 0.9339 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 736 score: 0.9212 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 737 score: 0.8839 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 738 score: 0.8875 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 739 score: 0.8889 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 740 score: 0.8806 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 741 score: 0.8454 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 742 score: 0.8380 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 743 score: 0.9004 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 744 score: 0.8962 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 745 score: 0.9179 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 746 score: 0.9381 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 747 score: 0.8580 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 748 score: 0.9531 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 749 score: 0.9024 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 750 score: 0.9486 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 751 score: 0.9417 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 752 score: 0.9411 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 753 score: 0.9277 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 754 score: 0.9157 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 755 score: 0.9307 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 756 score: 0.9201 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 757 score: 0.9199 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 758 score: 0.9064 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 759 score: 0.9249 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 760 score: 0.9614 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 761 score: 0.9133 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 762 score: 0.8770 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 763 score: 0.7263 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 764 score: 0.8774 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 765 score: 0.7718 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 766 score: 0.8750 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 767 score: 0.8255 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 768 score: 0.9424 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 769 score: 0.9605 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 770 score: 0.8852 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 771 score: 0.9526 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 772 score: 0.8791 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 773 score: 0.9153 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 774 score: 0.9177 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 775 score: 0.9117 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 776 score: 0.8188 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 777 score: 0.8832 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 778 score: 0.6362 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 779 score: 0.6893 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 780 score: 0.8768 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 781 score: 0.8195 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 782 score: 0.8450 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 783 score: 0.8791 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 784 score: 0.8571 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 785 score: 0.8421 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 786 score: 0.8996 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 787 score: 0.8728 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 788 score: 0.9040 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 789 score: 0.9309 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 790 score: 0.9363 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 791 score: 0.8967 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 792 score: 0.9047 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 793 score: 0.8907 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 794 score: 0.9012 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 795 score: 0.8815 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 796 score: 0.9597 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 797 score: 0.8177 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 798 score: 0.9167 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 799 score: 0.9041 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 800 score: 0.9031 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 801 score: 0.7574 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 802 score: 0.9124 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 803 score: 0.9349 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 804 score: 0.8506 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 805 score: 0.9460 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 806 score: 0.9411 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 807 score: 0.9340 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 808 score: 0.8503 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 809 score: 0.6806 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 810 score: 0.9084 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 811 score: 0.8766 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 812 score: 0.8915 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 813 score: 0.9044 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 814 score: 0.9072 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 815 score: 0.8010 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 816 score: 0.8927 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 817 score: 0.7527 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 818 score: 0.6326 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 819 score: 0.8601 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 820 score: 0.9505 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 821 score: 0.9486 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 822 score: 0.9026 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 823 score: 0.8516 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 824 score: 0.8890 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 825 score: 0.8958 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 826 score: 0.9254 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 827 score: 0.9301 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 828 score: 0.8902 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 829 score: 0.8885 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 830 score: 0.9084 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 831 score: 0.8988 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 832 score: 0.8769 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 833 score: 0.8087 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 834 score: 0.8356 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 835 score: 0.7909 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 836 score: 0.7815 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 837 score: 0.8729 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 838 score: 0.9212 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 839 score: 0.8954 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 840 score: 0.9039 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 841 score: 0.8545 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 842 score: 0.8153 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 843 score: 0.8477 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 844 score: 0.8342 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 845 score: 0.7973 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 846 score: 0.6983 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 847 score: 0.8690 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 848 score: 0.8879 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 849 score: 0.8544 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 850 score: 0.8716 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 851 score: 0.9156 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 852 score: 0.9400 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 853 score: 0.9287 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 854 score: 0.8732 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 855 score: 0.8381 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 856 score: 0.7510 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 857 score: 0.9022 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 858 score: 0.8490 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 859 score: 0.9409 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 860 score: 0.9282 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 861 score: 0.9136 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 862 score: 0.9432 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 863 score: 0.9448 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 864 score: 0.8902 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 865 score: 0.7337 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 866 score: 0.9163 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 867 score: 0.9038 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 868 score: 0.8787 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 869 score: 0.8931 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 870 score: 0.9572 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 871 score: 0.9206 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 872 score: 0.8914 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 873 score: 0.8717 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 874 score: 0.8755 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 875 score: 0.8298 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 876 score: 0.9190 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 877 score: 0.8674 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 878 score: 0.9213 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 879 score: 0.7510 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 880 score: 0.9091 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 881 score: 0.9383 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 882 score: 0.9244 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 883 score: 0.8844 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 884 score: 0.8666 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 885 score: 0.8850 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 886 score: 0.9328 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 887 score: 0.8873 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 888 score: 0.7128 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 889 score: 0.8405 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 890 score: 0.9263 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 891 score: 0.8887 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 892 score: 0.7597 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 893 score: 0.7709 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 894 score: 0.9103 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 895 score: 0.9326 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 896 score: 0.9235 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 897 score: 0.9016 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 898 score: 0.9046 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 899 score: 0.7347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 900 score: 0.8635 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 901 score: 0.8919 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 902 score: 0.8057 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 903 score: 0.8795 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 904 score: 0.8921 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 905 score: 0.8468 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 906 score: 0.9023 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 907 score: 0.8373 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 908 score: 0.8881 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 909 score: 0.9294 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 910 score: 0.8658 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 911 score: 0.9177 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 912 score: 0.8685 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 913 score: 0.9025 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 914 score: 0.8520 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 915 score: 0.8584 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 916 score: 0.8715 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 917 score: 0.9051 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 918 score: 0.8707 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 919 score: 0.8260 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 920 score: 0.8676 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 921 score: 0.8827 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 922 score: 0.8279 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 923 score: 0.8857 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 924 score: 0.8960 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 925 score: 0.8910 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 926 score: 0.9487 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 927 score: 0.7792 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 928 score: 0.9227 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 929 score: 0.8560 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 930 score: 0.9137 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 931 score: 0.8917 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 932 score: 0.7670 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 933 score: 0.8713 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 934 score: 0.7843 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 935 score: 0.8966 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 936 score: 0.8591 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 937 score: 0.9578 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 938 score: 0.8687 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 939 score: 0.9432 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 940 score: 0.9530 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 941 score: 0.9735 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 942 score: 0.9250 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 943 score: 0.7868 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 944 score: 0.8500 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 945 score: 0.8462 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 946 score: 0.7598 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 947 score: 0.8788 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 948 score: 0.8801 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 949 score: 0.9101 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 950 score: 0.8041 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 951 score: 0.8555 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 952 score: 0.8599 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 953 score: 0.8377 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 954 score: 0.7433 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 955 score: 0.9437 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 956 score: 0.8987 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 957 score: 0.9422 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 958 score: 0.8795 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 959 score: 0.8508 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 960 score: 0.8016 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 961 score: 0.6803 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 962 score: 0.8844 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 963 score: 0.9026 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 964 score: 0.9600 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 965 score: 0.9170 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 966 score: 0.6787 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 967 score: 0.7984 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 968 score: 0.8377 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 969 score: 0.8067 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 970 score: 0.8823 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 971 score: 0.8598 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 972 score: 0.8659 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 973 score: 0.8926 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 974 score: 0.8847 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 975 score: 0.9314 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 976 score: 0.8950 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 977 score: 0.8474 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 978 score: 0.9583 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 979 score: 0.8507 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 980 score: 0.8971 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 981 score: 0.8499 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 982 score: 0.9255 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 983 score: 0.8780 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 984 score: 0.9411 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 985 score: 0.9255 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 986 score: 0.9367 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 987 score: 0.8988 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 988 score: 0.8218 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 989 score: 0.7266 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 990 score: 0.8658 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 991 score: 0.8289 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 992 score: 0.9169 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 993 score: 0.9365 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 994 score: 0.8108 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 995 score: 0.9042 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 996 score: 0.9354 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 997 score: 0.9619 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 998 score: 0.9327 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 999 score: 0.8792 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1000 score: 0.9200 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1001 score: 0.8330 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1002 score: 0.9347 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1003 score: 0.8909 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1004 score: 0.9436 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1005 score: 0.9351 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1006 score: 0.8558 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1007 score: 0.7973 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1008 score: 0.9528 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1009 score: 0.8605 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1010 score: 0.8873 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no Segment 1011 score: 0.9127 +/beacon-scratch/tongzh24/ALMA-checkpoint/exp_16_languages/alma-13b-sft-16-languages-no-max-tokens-512//test-en-no score: 0.8735