grg commited on
Commit
6c34866
·
1 Parent(s): 64725c7

Adding llama4 scout and cydonia,ministrations,nautilus

Browse files
static/leaderboard.csv CHANGED
@@ -1,19 +1,23 @@
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
- reka-flash-3,0.3676470588235294,0.4899723723251134,0.2885614355228928,0.21908343015733012,0.6356444444444445,0.32448055555555566,0.3265472222222222
3
- Llama-3.3-70B-Instruct,0.7720588235294118,0.7826258982618856,0.8101440811870466,0.15086913865310292,0.7070694444444444,0.2484805555555556,0.2872888888888889
4
- Llama-3.1-70B-Instruct,0.6732026143790849,0.7716257197180342,0.7627363693137972,0.1590258306220288,0.7405305555555556,0.2069805555555555,0.2524638888888888
5
- Llama-3.1-Nemotron-70B-Instruct,0.7663398692810457,0.806561245024004,0.7986269216633632,0.15639272646142155,0.7771694444444445,0.1574805555555555,0.2053138888888889
6
- Llama-3.1-8B-Instruct,0.4566993464052287,0.6200396929619997,0.5544135256446929,0.177589336660551,0.6203138888888889,0.3474166666666667,0.3526444444444444
7
- Llama-3.2-3B-Instruct,0.35947712418300654,0.4920203960749596,0.3095420768898777,0.23477458340427637,0.6402583333333334,0.3671361111111112,0.34035277777777784
8
- Llama-3.2-1B-Instruct,0.14950980392156865,0.28494630130657267,0.017845599375419068,0.29245365371576204,0.4570666666666667,0.49829999999999997,0.458125
9
- Mistral-Large-Instruct-2411,0.5857843137254901,0.7334649247049873,0.7061158170869489,0.17181887070789792,0.7237666666666667,0.24669166666666675,0.262
10
- Mistral-Large-Instruct-2407,0.6846405228758169,0.7865403071348558,0.7786865940656633,0.18242669696158476,0.7757583333333334,0.19862499999999994,0.21713055555555572
11
- Mistral-Nemo-Instruct-2407,0.34068627450980393,0.5243749748919985,0.409719354830282,0.21289984240736382,0.565475,0.40046944444444443,0.39598333333333324
12
- Mistral-Small-3.1-24B-Instruct-2503,0.5326797385620915,0.7026337346865648,0.6848141294613206,0.17721164035276304,0.6713749999999999,0.29685555555555565,0.3154944444444444
13
- QwQ-32B,0.7230392156862745,0.7719313363889678,0.8091295835194909,0.1769481985217758,0.6934333333333333,0.27074999999999994,0.30680277777777776
14
- Qwen2.5-VL-72B-Instruct,0.8619281045751634,0.8360251140700989,0.8125460511443046,0.15932218090531514,0.8454611111111111,0.10019722222222227,0.14792499999999997
15
- Qwen2.5-VL-7B-Instruct,0.2450980392156863,0.42422904460473104,0.28582477835186304,0.25948620101404807,0.45279444444444444,0.4787000000000001,0.464075
16
- Qwen2.5-VL-3B-Instruct,0.1111111111111111,0.2655381207197522,0.059657867275330144,0.2877042811210807,0.3650055555555556,0.6013444444444445,0.5902833333333335
17
- Qwen2.5-14B-Instruct-1M,0.5359477124183007,0.7022023055217502,0.6551137143166985,0.1785253019816836,0.7130944444444444,0.26870277777777785,0.26870277777777773
18
- Dracarys2-72B-Instruct,0.7295751633986929,0.7740958932029343,0.789501612210195,0.15836985877285903,0.7307833333333333,0.2581972222222222,0.27945555555555546
19
- dummy,0.09967320261437908,0.2291015386716794,-0.009004148398032956,0.2928877637010999,0.3755222222222222,0.622275,0.5915305555555557
 
 
 
 
 
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
+ reka-flash-3,0.36177248677248675,0.4899723723251134,0.2885614355228928,0.21908343015733012,0.6356444444444445,0.32448055555555566,0.3265472222222222
3
+ Llama-4-Scout-17B-16E-Instruct,0.49007936507936495,0.6180917882104732,0.4967998772578945,0.1945790922366818,0.6979444444444444,0.2716777777777778,0.2741527777777778
4
+ Llama-3.3-70B-Instruct,0.7824074074074073,0.7826258982618856,0.8101440811870466,0.15086913865310292,0.7070694444444444,0.2484805555555556,0.2872888888888889
5
+ Llama-3.1-70B-Instruct,0.703042328042328,0.7716257197180342,0.7627363693137972,0.1590258306220288,0.7405305555555556,0.2069805555555555,0.2524638888888888
6
+ Llama-3.1-Nemotron-70B-Instruct,0.7843915343915344,0.806561245024004,0.7986269216633632,0.15639272646142155,0.7771694444444445,0.1574805555555555,0.2053138888888889
7
+ Llama-3.1-8B-Instruct,0.4728835978835979,0.6200396929619997,0.5544135256446929,0.177589336660551,0.6203138888888889,0.3474166666666667,0.3526444444444444
8
+ Llama-3.2-3B-Instruct,0.3498677248677249,0.4920203960749596,0.3095420768898777,0.23477458340427637,0.6402583333333334,0.3671361111111112,0.34035277777777784
9
+ Llama-3.2-1B-Instruct,0.14814814814814814,0.28494630130657267,0.017845599375419068,0.29245365371576204,0.4570666666666667,0.49829999999999997,0.458125
10
+ Mistral-Large-Instruct-2411,0.6117724867724867,0.7334649247049873,0.7061158170869489,0.17181887070789792,0.7237666666666667,0.24669166666666675,0.262
11
+ Mistral-Large-Instruct-2407,0.7096560846560847,0.7865403071348558,0.7786865940656633,0.18242669696158476,0.7757583333333334,0.19862499999999994,0.21713055555555572
12
+ Mistral-Nemo-Instruct-2407,0.34589947089947093,0.5243749748919985,0.409719354830282,0.21289984240736382,0.565475,0.40046944444444443,0.39598333333333324
13
+ Mistral-Small-3.1-24B-Instruct-2503,0.5582010582010581,0.7026337346865648,0.6848141294613206,0.17721164035276304,0.6713749999999999,0.29685555555555565,0.3154944444444444
14
+ QwQ-32B,0.7367724867724869,0.7719313363889678,0.8091295835194909,0.1769481985217758,0.6934333333333333,0.27074999999999994,0.30680277777777776
15
+ Qwen2.5-VL-72B-Instruct,0.8716931216931216,0.8360251140700989,0.8125460511443046,0.15932218090531514,0.8454611111111111,0.10019722222222227,0.14792499999999997
16
+ Qwen2.5-VL-7B-Instruct,0.22817460317460314,0.42422904460473104,0.28582477835186304,0.25948620101404807,0.45279444444444444,0.4787000000000001,0.464075
17
+ Qwen2.5-VL-3B-Instruct,0.10052910052910052,0.2655381207197522,0.059657867275330144,0.2877042811210807,0.3650055555555556,0.6013444444444445,0.5902833333333335
18
+ Qwen2.5-14B-Instruct-1M,0.5654761904761904,0.7022023055217502,0.6551137143166985,0.1785253019816836,0.7130944444444444,0.26870277777777785,0.26870277777777773
19
+ Dracarys2-72B-Instruct,0.7533068783068784,0.7740958932029343,0.789501612210195,0.15836985877285903,0.7307833333333333,0.2581972222222222,0.27945555555555546
20
+ Nautilus-70B-v0.1,0.5958994708994709,0.724110072700376,0.7188870305946458,0.1688175407755762,0.6729305555555555,0.2789694444444444,0.30781111111111104
21
+ Cydonia-22B-v1.2,0.40674603174603174,0.5660454327134821,0.4926092723062008,0.20778751640583537,0.5617833333333333,0.40446111111111116,0.39160833333333345
22
+ Ministrations-8B-v1,0.3220899470899471,0.4918972616356232,0.34889128317469287,0.2302731507248964,0.5484527777777779,0.4000972222222221,0.3784694444444444
23
+ dummy,0.09722222222222221,0.2291015386716794,-0.009004148398032956,0.2928877637010999,0.3755222222222222,0.622275,0.5915305555555557
static/models_data/Cydonia-22B-v1.2/cfa_metrics.csv CHANGED
@@ -1,10 +1,10 @@
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
- chunk_0,0.38787499999999997,0.349075,0.55785,0.5835250000000001
3
- chunk_1,0.895525,0.8709,0.098275,0.08115
4
- chunk_2,0.8167249999999999,0.7593500000000001,0.109725,0.13575
5
- chunk_3,0.820225,0.7622500000000001,0.12269999999999999,0.13224999999999998
6
- chunk_4,0.6108,0.551775,0.34125,0.35359999999999997
7
- chunk_chess_0,0.6475500000000001,0.6178,0.329075,0.345675
8
- chunk_grammar_1,0.61065,0.572025,0.35305,0.35135
9
- chunk_no_conv,0.834975,0.77905,0.11562499999999999,0.1638
10
- chunk_svs_no_conv,0.120025,0.09015,0.7901,0.797425
 
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.685575,0.6641,0.32205,0.289525
3
+ chunk_1,0.206025,0.18815,0.78415,0.775325
4
+ chunk_2,0.752125,0.6571499999999999,0.10994999999999999,0.125125
5
+ chunk_3,0.9073249999999999,0.95505,0.08625,0.043899999999999995
6
+ chunk_4,0.6129749999999999,0.5725750000000001,0.330575,0.32497499999999996
7
+ chunk_chess_0,0.35215,0.30995,0.5720000000000001,0.582775
8
+ chunk_grammar_1,0.658675,0.64225,0.319825,0.30225
9
+ chunk_no_conv,0.661025,0.65435,0.333675,0.3093
10
+ chunk_svs_no_conv,0.220175,0.21,0.781675,0.7713
static/models_data/Cydonia-22B-v1.2/matrix.svg CHANGED
static/models_data/Cydonia-22B-v1.2/ranks.svg CHANGED
static/models_data/Cydonia-22B-v1.2/structure.svg CHANGED
static/models_data/Llama-4-Scout-17B-16E-Instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.4273,0.4796,0.54675,0.524575
3
+ chunk_1,0.658525,0.6182,0.320025,0.30524999999999997
4
+ chunk_2,0.691975,0.6694,0.315175,0.296125
5
+ chunk_3,0.934225,0.93585,0.082675,0.0569
6
+ chunk_4,0.6817,0.64905,0.310925,0.31832499999999997
7
+ chunk_chess_0,0.8697999999999999,0.82065,0.09937499999999999,0.14279999999999998
8
+ chunk_grammar_1,0.580875,0.516275,0.34535000000000005,0.369725
9
+ chunk_no_conv,0.6344000000000001,0.5966,0.32425,0.355075
10
+ chunk_svs_no_conv,0.8027,0.7884,0.10057500000000001,0.0986
static/models_data/Llama-4-Scout-17B-16E-Instruct/matrix.svg ADDED
static/models_data/Llama-4-Scout-17B-16E-Instruct/ranks.svg ADDED
static/models_data/Llama-4-Scout-17B-16E-Instruct/structure.svg ADDED
static/models_data/Ministrations-8B-v1/cfa_metrics.csv CHANGED
@@ -1,10 +1,10 @@
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
- chunk_0,0.46715,0.49015,0.5363,0.529325
3
- chunk_1,0.202875,0.1907,0.774875,0.7669250000000001
4
- chunk_2,0.481425,0.6037250000000001,0.5422750000000001,0.51175
5
- chunk_3,0.6098250000000001,0.5543,0.32732500000000003,0.314075
6
- chunk_4,0.428925,0.401625,0.542175,0.547875
7
- chunk_chess_0,0.650825,0.6739999999999999,0.32,0.29962500000000003
8
- chunk_grammar_1,0.583025,0.525525,0.326875,0.321575
9
- chunk_no_conv,0.43115,0.405575,0.55255,0.549325
10
- chunk_svs_no_conv,0.8694500000000001,0.83165,0.104675,0.09305
 
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.4901,0.4047,0.341025,0.33805
3
+ chunk_1,0.5885,0.542225,0.323075,0.30022499999999996
4
+ chunk_2,0.45375,0.590275,0.550125,0.523225
5
+ chunk_3,0.30415000000000003,0.214625,0.560325,0.5645
6
+ chunk_4,0.8859250000000001,0.83105,0.09234999999999999,0.047525
7
+ chunk_chess_0,0.958,0.96065,0.084925,0.025925
8
+ chunk_grammar_1,0.5,0.6065750000000001,0.534025,0.5
9
+ chunk_no_conv,0.41195000000000004,0.39695,0.55445,0.529
10
+ chunk_svs_no_conv,0.3437,0.30210000000000004,0.560575,0.577775
static/models_data/Ministrations-8B-v1/matrix.svg CHANGED
static/models_data/Ministrations-8B-v1/ranks.svg CHANGED
static/models_data/Ministrations-8B-v1/structure.svg CHANGED
static/models_data/Nautilus-70B-v0.1/cfa_metrics.csv CHANGED
@@ -1,10 +1,10 @@
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
- chunk_0,0.628875,0.591275,0.3249,0.32442499999999996
3
- chunk_1,0.822175,0.745225,0.11009999999999999,0.13985
4
- chunk_2,0.897,0.86425,0.103925,0.1001
5
- chunk_3,0.6444,0.6114499999999999,0.329275,0.327175
6
- chunk_4,0.83545,0.772575,0.102525,0.154
7
- chunk_chess_0,0.599925,0.5493,0.341425,0.37892500000000007
8
- chunk_grammar_1,0.8415250000000001,0.787325,0.108175,0.14775
9
- chunk_no_conv,0.833025,0.76905,0.1275,0.1716
10
- chunk_svs_no_conv,0.6538999999999999,0.627275,0.335675,0.33564999999999995
 
1
  Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.43345,0.4014,0.5453,0.56775
3
+ chunk_1,0.8986,0.8647250000000001,0.09405,0.103175
4
+ chunk_2,0.5970500000000001,0.55005,0.349075,0.359575
5
+ chunk_3,0.870275,0.82635,0.09782500000000001,0.12525
6
+ chunk_4,0.625625,0.5858,0.334975,0.3486
7
+ chunk_chess_0,0.6296250000000001,0.595625,0.325925,0.347075
8
+ chunk_grammar_1,0.561825,0.5008750000000001,0.338625,0.39225
9
+ chunk_no_conv,0.8185250000000001,0.75295,0.09725,0.168225
10
+ chunk_svs_no_conv,0.6214,0.583125,0.3277,0.3584
static/models_data/Nautilus-70B-v0.1/matrix.svg CHANGED
static/models_data/Nautilus-70B-v0.1/ranks.svg CHANGED
static/models_data/Nautilus-70B-v0.1/structure.svg CHANGED
static/models_data/cardinal.svg CHANGED
static/models_data/ordinal.svg CHANGED