Minor improvement in elo explanation (#7)
Browse files- Minor improvement in elo explanation (61b30c6d995b3cee0a40a7251d5b972a4524f5b6)
Co-authored-by: Furkan Eris <[email protected]>
app.py
CHANGED
@@ -2113,7 +2113,7 @@ def create_interface():
|
|
2113 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (24 for model pairs)<br>" +
|
2114 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score from user feedback (1 for correct, 0 for incorrect)<br>" +
|
2115 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
2116 |
-
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 -
|
2117 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
2118 |
"<h4 style='margin-top: 0; color: #ffffff;'>Available Models</h4>" +
|
2119 |
"<p style='color: #eceff1;'>The system randomly selects from these models for each hallucination detection:</p>" +
|
@@ -2260,7 +2260,7 @@ def create_interface():
|
|
2260 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (32 for individual models)<br>" +
|
2261 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score (1 for correct judgment, 0 for incorrect)<br>" +
|
2262 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
2263 |
-
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 -
|
2264 |
"<p style='color: #eceff1; margin-top: 10px;'>All models start with a base ELO of 1500. Scores are updated after each user evaluation.</p></div>" +
|
2265 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
2266 |
"<h4 style='margin-top: 0; color: #ffffff;'>Interpretation Guidelines</h4>" +
|
|
|
2113 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (24 for model pairs)<br>" +
|
2114 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score from user feedback (1 for correct, 0 for incorrect)<br>" +
|
2115 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
2116 |
+
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 - ELO_old)/400</sup>)</em></div></div>" +
|
2117 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
2118 |
"<h4 style='margin-top: 0; color: #ffffff;'>Available Models</h4>" +
|
2119 |
"<p style='color: #eceff1;'>The system randomly selects from these models for each hallucination detection:</p>" +
|
|
|
2260 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (32 for individual models)<br>" +
|
2261 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score (1 for correct judgment, 0 for incorrect)<br>" +
|
2262 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
2263 |
+
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 - ELO_old)/400</sup>)</em></div>" +
|
2264 |
"<p style='color: #eceff1; margin-top: 10px;'>All models start with a base ELO of 1500. Scores are updated after each user evaluation.</p></div>" +
|
2265 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
2266 |
"<h4 style='margin-top: 0; color: #ffffff;'>Interpretation Guidelines</h4>" +
|