UI improvements for light mode

#10
Files changed (1) hide show
  1. app.py +672 -106
app.py CHANGED
@@ -1222,7 +1222,7 @@ class ProgressTracker:
1222
  # Add model information if available and we're not in idle or error state
1223
  model_info = ''
1224
  if self.stage not in ["idle", "error", "starting"] and (self.generator_model or self.judge_model):
1225
- model_info = f'<div class="model-info" style="display: flex; justify-content: space-between; margin-top: 8px; font-size: 0.85em; color: #37474f; background-color: #e1f5fe; padding: 5px 10px; border-radius: 4px;">'
1226
  if self.generator_model:
1227
  model_info += f'<div><span style="font-weight: bold;">Generator:</span> {self.generator_model}</div>'
1228
  if self.judge_model:
@@ -1293,21 +1293,24 @@ def create_interface():
1293
 
1294
  # CSS for styling
1295
  css = """
 
1296
  .container {
1297
  max-width: 1000px;
1298
  margin: 0 auto;
1299
  }
 
 
1300
  .title {
1301
  text-align: center;
1302
  margin-bottom: 0.5em;
1303
- color: #0d47a1;
1304
  font-weight: 600;
 
1305
  }
1306
  .subtitle {
1307
  text-align: center;
1308
  margin-bottom: 1.5em;
1309
- color: #37474f;
1310
  font-size: 1.2em;
 
1311
  }
1312
  .section-title {
1313
  margin-top: 1em;
@@ -1318,63 +1321,63 @@ def create_interface():
1318
  .info-box {
1319
  padding: 1.2em;
1320
  border-radius: 8px;
1321
- background-color: var(--background-fill-secondary);
1322
  margin-bottom: 1em;
1323
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
1324
- color: var(--body-text-color);
1325
  line-height: 1.5;
1326
- border-left: 3px solid var(--border-color-accent);
1327
- border: 1px solid var(--border-color-primary);
 
 
1328
  }
1329
  .info-box p strong {
1330
- color: var(--body-text-color);
1331
  font-weight: 600;
1332
  }
1333
  .hallucination-positive {
1334
  padding: 1.2em;
1335
  border-radius: 8px;
1336
- background-color: #f8e8e8;
1337
- border-left: 5px solid #c62828;
1338
  margin-bottom: 1em;
1339
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1340
- color: #4d0c0c;
1341
  }
1342
  .hallucination-positive h3 {
1343
- color: #c62828;
1344
  margin-top: 0;
1345
  margin-bottom: 0.5em;
1346
  }
1347
  .hallucination-positive p {
1348
- color: #5d4141;
1349
  line-height: 1.5;
1350
  }
1351
  .hallucination-negative {
1352
  padding: 1.2em;
1353
  border-radius: 8px;
1354
- background-color: #e8f5e9;
1355
- border-left: 5px solid #2e7d32;
1356
  margin-bottom: 1em;
1357
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1358
- color: #1b5e20;
1359
  }
1360
  .hallucination-negative h3 {
1361
- color: #2e7d32;
1362
  margin-top: 0;
1363
  margin-bottom: 0.5em;
1364
  }
1365
  .hallucination-negative p {
1366
- color: #3e5e40;
1367
  line-height: 1.5;
1368
  }
1369
  .response-box {
1370
  padding: 1.2em;
1371
  border-radius: 8px;
1372
- background-color: #eceff1;
1373
  margin-bottom: 0.8em;
1374
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1375
- color: #37474f;
1376
  line-height: 1.5;
1377
- border-left: 3px solid #78909c;
1378
  }
1379
  .example-queries {
1380
  display: flex;
@@ -1383,42 +1386,46 @@ def create_interface():
1383
  margin-bottom: 15px;
1384
  }
1385
  .example-query {
1386
- background-color: #e1f5fe;
1387
  padding: 8px 15px;
1388
  border-radius: 18px;
1389
  font-size: 0.9em;
1390
  cursor: pointer;
1391
  transition: all 0.2s;
1392
- border: 1px solid #b3e5fc;
1393
- color: #01579b;
1394
  }
1395
  .example-query:hover {
1396
- background-color: #b3e5fc;
1397
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
1398
  }
1399
  .stats-section {
1400
  display: flex;
1401
  justify-content: space-between;
1402
- background-color: #e3f2fd;
1403
  padding: 15px;
1404
- border-radius: 8px;
1405
  margin-bottom: 20px;
 
 
 
1406
  }
1407
  .stat-item {
1408
  text-align: center;
1409
  padding: 10px;
1410
  }
1411
  .stat-value {
1412
- font-size: 1.5em;
1413
  font-weight: bold;
1414
- color: #0d47a1;
1415
  }
1416
  .stat-label {
1417
  font-size: 0.9em;
1418
- color: #1976d2;
 
1419
  }
1420
  .feedback-section {
1421
- border-top: 1px solid #e0e0e0;
1422
  padding-top: 15px;
1423
  margin-top: 20px;
1424
  }
@@ -1426,20 +1433,20 @@ def create_interface():
1426
  text-align: center;
1427
  padding: 20px;
1428
  margin-top: 30px;
1429
- color: #607d8b;
1430
  font-size: 0.9em;
1431
  }
1432
  .processing-status {
1433
  padding: 12px;
1434
- background-color: #e1f5fe;
1435
- border-left: 4px solid #0288d1;
1436
  margin-bottom: 15px;
1437
  font-weight: 500;
1438
- color: #01579b;
1439
  }
1440
  .debug-panel {
1441
- background-color: #f5f5f5;
1442
- border: 1px solid #e0e0e0;
1443
  border-radius: 4px;
1444
  padding: 10px;
1445
  margin-top: 15px;
@@ -1448,13 +1455,15 @@ def create_interface():
1448
  white-space: pre-wrap;
1449
  max-height: 200px;
1450
  overflow-y: auto;
 
1451
  }
1452
  .progress-container {
1453
  padding: 15px;
1454
- background-color: #fff;
1455
  border-radius: 8px;
1456
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1457
  margin-bottom: 15px;
 
1458
  }
1459
  .progress-status {
1460
  font-weight: 500;
@@ -1463,7 +1472,7 @@ def create_interface():
1463
  font-size: 0.95em;
1464
  }
1465
  .progress-bar-container {
1466
- background-color: #e0e0e0;
1467
  height: 10px;
1468
  border-radius: 5px;
1469
  overflow: hidden;
@@ -1473,16 +1482,595 @@ def create_interface():
1473
  .progress-bar {
1474
  height: 100%;
1475
  transition: width 0.5s ease;
1476
- background-image: linear-gradient(to right, #2196F3, #3f51b5);
1477
  }
1478
  .query-display {
1479
  font-style: italic;
1480
- color: #666;
1481
  margin-bottom: 10px;
1482
- background-color: #f5f5f5;
1483
  padding: 8px;
1484
  border-radius: 4px;
1485
- border-left: 3px solid #2196F3;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1486
  }
1487
  """
1488
 
@@ -1749,14 +2337,14 @@ def create_interface():
1749
  <div class="container">
1750
  <h2 class="title">Hallucination Detection Results</h2>
1751
 
1752
- <div class="model-info-bar" style="background-color: #e1f5fe; padding: 10px 15px; border-radius: 8px; margin-bottom: 15px; display: flex; justify-content: space-between;">
1753
- <div style="flex: 1; text-align: center; border-right: 1px solid #b3e5fc; padding-right: 10px;">
1754
- <div style="font-weight: bold; color: #0277bd;">Generator Model</div>
1755
- <div style="font-size: 1.2em; color: #01579b;">{generator_model}</div>
1756
  </div>
1757
- <div style="flex: 1; text-align: center; padding-left: 10px;">
1758
- <div style="font-weight: bold; color: #0277bd;">Judge Model</div>
1759
- <div style="font-size: 1.2em; color: #01579b;">{judge_model}</div>
1760
  </div>
1761
  </div>
1762
 
@@ -1789,7 +2377,7 @@ def create_interface():
1789
  {original_query}
1790
  </div>
1791
 
1792
- <div class="section-title">Original Response <span style="font-size: 0.8em; color: #607d8b;">(generated by {generator_model})</span></div>
1793
  <div class="response-box">
1794
  {original_response_safe}
1795
  </div>
@@ -1804,14 +2392,14 @@ def create_interface():
1804
  {q}
1805
  </div>
1806
 
1807
- <div class="section-title">Response {i} <span style="font-size: 0.8em; color: #607d8b;">(generated by {generator_model})</span></div>
1808
  <div class="response-box">
1809
  {r}
1810
  </div>
1811
  """
1812
 
1813
  html_output += f"""
1814
- <div class="section-title">Detailed Analysis <span style="font-size: 0.8em; color: #607d8b;">(judged by {judge_model})</span></div>
1815
  <div class="info-box">
1816
  <p><strong>Reasoning:</strong></p>
1817
  <p>{reasoning_safe}</p>
@@ -1820,7 +2408,7 @@ def create_interface():
1820
  <p>{conflicting_facts_text_safe}</p>
1821
  </div>
1822
 
1823
- <div style="margin-top: 20px; border-top: 1px dashed #ccc; padding-top: 15px; font-size: 0.9em; color: #607d8b; text-align: center;">
1824
  Models randomly selected for this analysis: <strong>{generator_model}</strong> (Generator) and <strong>{judge_model}</strong> (Judge)
1825
  </div>
1826
  </div>
@@ -1945,9 +2533,9 @@ def create_interface():
1945
  gr.HTML(
1946
  """
1947
  <div style="text-align: center; margin-bottom: 1.5rem">
1948
- <h1 style="font-size: 2.2em; font-weight: 600; color: #1a237e; margin-bottom: 0.2em;">PAS2 - Hallucination Detector</h1>
1949
- <h3 style="font-size: 1.3em; color: #455a64; margin-bottom: 0.8em;">Advanced AI Response Verification Using Model-as-Judge</h3>
1950
- <p style="font-size: 1.1em; color: #546e7a; max-width: 800px; margin: 0 auto;">
1951
  This tool detects hallucinations in AI responses by comparing answers to semantically equivalent questions and using a specialized judge model.
1952
  </p>
1953
  </div>
@@ -2075,9 +2663,8 @@ def create_interface():
2075
 
2076
  if not pairs:
2077
  return (
2078
- "<div class=\"info-message\" style=\"padding: 20px; background-color: #e1f5fe; "
2079
- "border-radius: 8px; text-align: center; margin: 20px 0;\">"
2080
- "<h3 style=\"margin-top: 0; color: #0277bd;\">No Data Available Yet</h3>"
2081
  "<p>Try the detector with more queries to populate the leaderboard!</p>"
2082
  "</div>"
2083
  )
@@ -2106,9 +2693,9 @@ def create_interface():
2106
  f"<td>{pair.get('judge', 'unknown')}</td>"
2107
  f"<td>{round(pair.get('elo_score', 0))}</td>"
2108
  f"<td>{pair.get('accuracy')}%</td>"
2109
- f"<td style='color: #80cbc4; font-weight: 500;'>{generator_perf}</td>"
2110
- f"<td style='color: #90caf9; font-weight: 500;'>{judge_perf}</td>"
2111
- f"<td style='color: #ce93d8; font-weight: 500;'>{consistency}</td>"
2112
  f"<td>{pair.get('total_samples', 0)}</td>"
2113
  f"</tr>"
2114
  )
@@ -2135,13 +2722,13 @@ def create_interface():
2135
  f"</tbody>"
2136
  f"</table>"
2137
  f"</div>"
2138
- f"<div style='margin-top: 15px; padding: 12px; background-color: #263238; border-radius: 8px; font-size: 0.95em; color: #e0f7fa; box-shadow: 0 2px 5px rgba(0,0,0,0.2);'>"
2139
- f"<p style='margin-bottom: 8px; color: #80deea;'><strong>Model Pair Performance Metrics:</strong></p>"
2140
- f"<ul style='margin-top: 5px; padding-left: 20px; line-height: 1.4;'>"
2141
- f"<li><strong style='color: #b2dfdb;'>Accuracy</strong>: Percentage of correct hallucination judgments based on user feedback</li>"
2142
- f"<li><strong style='color: #b2dfdb;'>Generator Performance</strong>: How well the generator model avoids hallucinations</li>"
2143
- f"<li><strong style='color: #b2dfdb;'>Judge Performance</strong>: How accurately the judge model identifies hallucinations</li>"
2144
- f"<li><strong style='color: #b2dfdb;'>Consistency</strong>: Weighted measure of how well the pair works together</li>"
2145
  f"</ul>"
2146
  f"</div>"
2147
  )
@@ -2150,9 +2737,8 @@ def create_interface():
2150
  except Exception as e:
2151
  logger.error("Error generating leaderboard HTML: %s", str(e), exc_info=True)
2152
  return (
2153
- f"<div class=\"error-message\" style=\"padding: 20px; background-color: #ffebee; "
2154
- f"border-radius: 8px; text-align: center; margin: 20px 0;\">"
2155
- f"<h3 style=\"margin-top: 0; color: #c62828;\">Error Loading Leaderboard</h3>"
2156
  f"<p>{str(e)}</p>"
2157
  f"</div>"
2158
  )
@@ -2192,24 +2778,6 @@ def create_interface():
2192
  "<li>deepseek-reasoner</li><li>o4-mini</li><li>gemini-2.5-pro</li>" +
2193
  "</ul></div></div></div></div></div>"
2194
  )
2195
- gr.HTML(
2196
- "<style>" +
2197
- ".leaderboard-container {margin: 15px 0; overflow-x: auto;}" +
2198
- ".leaderboard-table {width: 100%; border-collapse: collapse; font-size: 0.95em; " +
2199
- "box-shadow: 0 2px 10px rgba(0,0,0,0.2); border-radius: 8px; overflow: hidden;}" +
2200
- ".leaderboard-table thead {background-color: #0d47a1; color: white;}" +
2201
- ".leaderboard-table th, .leaderboard-table td {padding: 12px 15px; text-align: left; border-bottom: 1px solid #37474f; color: #eceff1;}" +
2202
- ".leaderboard-table tbody tr {transition: background-color 0.3s;}" +
2203
- ".leaderboard-table tbody tr:nth-child(even) {background-color: #37474f;}" +
2204
- ".leaderboard-table tbody tr:nth-child(odd) {background-color: #455a64;}" +
2205
- ".leaderboard-table tbody tr:hover {background-color: #263238;}" +
2206
- ".leaderboard-table tbody tr.top-rank-1 {background-color: #004d40; color: #e0f2f1; font-weight: bold;}" +
2207
- ".leaderboard-table tbody tr.top-rank-2 {background-color: #1b5e20; color: #e8f5e9; font-weight: 500;}" +
2208
- ".leaderboard-table tbody tr.top-rank-3 {background-color: #33691e; color: #f1f8e9; font-weight: 500;}" +
2209
- ".leaderboard-table td {position: relative;}" +
2210
- ".leaderboard-table td::after {content: ''; position: absolute; top: 0; left: 0; width: 100%; height: 100%; background: transparent; pointer-events: none;}" +
2211
- "</style>"
2212
- )
2213
 
2214
  # Tab 3: Individual Models Leaderboard
2215
  with gr.TabItem("Individual Models", elem_id="user-feedback-tab"):
@@ -2224,9 +2792,8 @@ def create_interface():
2224
 
2225
  if not models:
2226
  return (
2227
- "<div class=\"info-message\" style=\"padding: 20px; background-color: #e1f5fe; "
2228
- "border-radius: 8px; text-align: center; margin: 20px 0;\">"
2229
- "<h3 style=\"margin-top: 0; color: #0277bd;\">No Data Available Yet</h3>"
2230
  "<p>Try the detector with more queries to populate the model scores!</p>"
2231
  "</div>"
2232
  )
@@ -2263,10 +2830,10 @@ def create_interface():
2263
  f"<td>{model.get('model_name', 'unknown')}</td>"
2264
  f"<td>{round(model.get('elo_score', 0))}</td>"
2265
  f"<td>{model.get('accuracy')}%</td>"
2266
- f"<td style='color: #80cbc4; font-weight: 500;'>{generator_acc}</td>"
2267
- f"<td style='color: #90caf9; font-weight: 500;'>{judge_acc}</td>"
2268
  f"<td>{model.get('total_samples', 0)}</td>"
2269
- f"<td style='color: #ffcc80; font-weight: 500;'>{role_distribution}</td>"
2270
  f"</tr>"
2271
  )
2272
 
@@ -2297,9 +2864,8 @@ def create_interface():
2297
  except Exception as e:
2298
  logger.error("Error generating model leaderboard HTML: %s", str(e), exc_info=True)
2299
  return (
2300
- f"<div class=\"error-message\" style=\"padding: 20px; background-color: #ffebee; "
2301
- f"border-radius: 8px; text-align: center; margin: 20px 0;\">"
2302
- f"<h3 style=\"margin-top: 0; color: #c62828;\">Error Loading Model Leaderboard</h3>"
2303
  f"<p>{str(e)}</p>"
2304
  f"</div>"
2305
  )
@@ -2356,17 +2922,17 @@ def create_interface():
2356
  accuracy_pct = f"{accuracy * 100:.1f}%"
2357
 
2358
  stats_html = f"""
2359
- <div class="stats-section" style="background-color: #e0f7fa; border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); margin-top: 5px;">
2360
  <div class="stat-item">
2361
- <div class="stat-value" style="font-size: 2em; color: #00838f;">{total}</div>
2362
- <div class="stat-label" style="font-weight: bold; color: #006064;">Total Responses</div>
2363
  </div>
2364
  <div class="stat-item">
2365
- <div class="stat-value" style="font-size: 2em; color: #00838f;">{accuracy_pct}</div>
2366
- <div class="stat-label" style="font-weight: bold; color: #006064;">Correct Predictions</div>
2367
  </div>
2368
  </div>
2369
- <div style="text-align: center; margin-top: 10px; font-style: italic; color: #37474f;">
2370
  Based on user feedback: {correct} correct out of {total} total predictions
2371
  </div>
2372
  """
 
1222
  # Add model information if available and we're not in idle or error state
1223
  model_info = ''
1224
  if self.stage not in ["idle", "error", "starting"] and (self.generator_model or self.judge_model):
1225
+ model_info = f'<div class="progress-model-info">'
1226
  if self.generator_model:
1227
  model_info += f'<div><span style="font-weight: bold;">Generator:</span> {self.generator_model}</div>'
1228
  if self.judge_model:
 
1293
 
1294
  # CSS for styling
1295
  css = """
1296
+ /* Base styles */
1297
  .container {
1298
  max-width: 1000px;
1299
  margin: 0 auto;
1300
  }
1301
+
1302
+ /* Light theme default styles */
1303
  .title {
1304
  text-align: center;
1305
  margin-bottom: 0.5em;
 
1306
  font-weight: 600;
1307
+ color: #0d47a1;
1308
  }
1309
  .subtitle {
1310
  text-align: center;
1311
  margin-bottom: 1.5em;
 
1312
  font-size: 1.2em;
1313
+ color: #37474f;
1314
  }
1315
  .section-title {
1316
  margin-top: 1em;
 
1321
  .info-box {
1322
  padding: 1.2em;
1323
  border-radius: 8px;
 
1324
  margin-bottom: 1em;
1325
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
 
1326
  line-height: 1.5;
1327
+ border: 1px solid #dee2e6;
1328
+ border-left: 3px solid #6c757d;
1329
+ background-color: #f8f9fa;
1330
+ color: #212529;
1331
  }
1332
  .info-box p strong {
1333
+ color: #495057;
1334
  font-weight: 600;
1335
  }
1336
  .hallucination-positive {
1337
  padding: 1.2em;
1338
  border-radius: 8px;
1339
+ background-color: #ffeaea;
1340
+ border-left: 5px solid #e53e3e;
1341
  margin-bottom: 1em;
1342
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1343
+ color: #742a2a;
1344
  }
1345
  .hallucination-positive h3 {
1346
+ color: #e53e3e;
1347
  margin-top: 0;
1348
  margin-bottom: 0.5em;
1349
  }
1350
  .hallucination-positive p {
1351
+ color: #742a2a;
1352
  line-height: 1.5;
1353
  }
1354
  .hallucination-negative {
1355
  padding: 1.2em;
1356
  border-radius: 8px;
1357
+ background-color: #f0fff4;
1358
+ border-left: 5px solid #38a169;
1359
  margin-bottom: 1em;
1360
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1361
+ color: #22543d;
1362
  }
1363
  .hallucination-negative h3 {
1364
+ color: #38a169;
1365
  margin-top: 0;
1366
  margin-bottom: 0.5em;
1367
  }
1368
  .hallucination-negative p {
1369
+ color: #22543d;
1370
  line-height: 1.5;
1371
  }
1372
  .response-box {
1373
  padding: 1.2em;
1374
  border-radius: 8px;
1375
+ background-color: #f7fafc;
1376
  margin-bottom: 0.8em;
1377
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1378
+ color: #2d3748;
1379
  line-height: 1.5;
1380
+ border-left: 3px solid #a0aec0;
1381
  }
1382
  .example-queries {
1383
  display: flex;
 
1386
  margin-bottom: 15px;
1387
  }
1388
  .example-query {
1389
+ background-color: #ebf8ff;
1390
  padding: 8px 15px;
1391
  border-radius: 18px;
1392
  font-size: 0.9em;
1393
  cursor: pointer;
1394
  transition: all 0.2s;
1395
+ border: 1px solid #bee3f8;
1396
+ color: #2c5282;
1397
  }
1398
  .example-query:hover {
1399
+ background-color: #bee3f8;
1400
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
1401
  }
1402
  .stats-section {
1403
  display: flex;
1404
  justify-content: space-between;
1405
+ background-color: #ebf8ff;
1406
  padding: 15px;
1407
+ border-radius: 10px;
1408
  margin-bottom: 20px;
1409
+ margin-top: 5px;
1410
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
1411
+ border: 1px solid #bee3f8;
1412
  }
1413
  .stat-item {
1414
  text-align: center;
1415
  padding: 10px;
1416
  }
1417
  .stat-value {
1418
+ font-size: 2em;
1419
  font-weight: bold;
1420
+ color: #2c5282;
1421
  }
1422
  .stat-label {
1423
  font-size: 0.9em;
1424
+ font-weight: bold;
1425
+ color: #3182ce;
1426
  }
1427
  .feedback-section {
1428
+ border-top: 1px solid #e2e8f0;
1429
  padding-top: 15px;
1430
  margin-top: 20px;
1431
  }
 
1433
  text-align: center;
1434
  padding: 20px;
1435
  margin-top: 30px;
1436
+ color: #718096;
1437
  font-size: 0.9em;
1438
  }
1439
  .processing-status {
1440
  padding: 12px;
1441
+ background-color: #ebf8ff;
1442
+ border-left: 4px solid #3182ce;
1443
  margin-bottom: 15px;
1444
  font-weight: 500;
1445
+ color: #2c5282;
1446
  }
1447
  .debug-panel {
1448
+ background-color: #f7fafc;
1449
+ border: 1px solid #e2e8f0;
1450
  border-radius: 4px;
1451
  padding: 10px;
1452
  margin-top: 15px;
 
1455
  white-space: pre-wrap;
1456
  max-height: 200px;
1457
  overflow-y: auto;
1458
+ color: #4a5568;
1459
  }
1460
  .progress-container {
1461
  padding: 15px;
1462
+ background-color: #ffffff;
1463
  border-radius: 8px;
1464
  box-shadow: 0 2px 5px rgba(0,0,0,0.05);
1465
  margin-bottom: 15px;
1466
+ border: 1px solid #e2e8f0;
1467
  }
1468
  .progress-status {
1469
  font-weight: 500;
 
1472
  font-size: 0.95em;
1473
  }
1474
  .progress-bar-container {
1475
+ background-color: #edf2f7;
1476
  height: 10px;
1477
  border-radius: 5px;
1478
  overflow: hidden;
 
1482
  .progress-bar {
1483
  height: 100%;
1484
  transition: width 0.5s ease;
1485
+ background-image: linear-gradient(to right, #3182ce, #2b6cb0);
1486
  }
1487
  .query-display {
1488
  font-style: italic;
1489
+ color: #718096;
1490
  margin-bottom: 10px;
1491
+ background-color: #f7fafc;
1492
  padding: 8px;
1493
  border-radius: 4px;
1494
+ border-left: 3px solid #3182ce;
1495
+ }
1496
+
1497
+ /* Dark theme styles */
1498
+ @media (prefers-color-scheme: dark) {
1499
+ .title {
1500
+ color: #63b3ed;
1501
+ }
1502
+ .subtitle {
1503
+ color: #a0aec0;
1504
+ }
1505
+ .section-title {
1506
+ color: #90cdf4;
1507
+ }
1508
+ .info-box {
1509
+ background-color: #2d3748;
1510
+ color: #e2e8f0;
1511
+ border-color: #4a5568;
1512
+ border-left-color: #718096;
1513
+ }
1514
+ .info-box p strong {
1515
+ color: #f7fafc;
1516
+ }
1517
+ .hallucination-positive {
1518
+ background-color: #553c39;
1519
+ color: #fed7d7;
1520
+ border-left-color: #fc8181;
1521
+ }
1522
+ .hallucination-positive h3 {
1523
+ color: #fc8181;
1524
+ }
1525
+ .hallucination-positive p {
1526
+ color: #fed7d7;
1527
+ }
1528
+ .hallucination-negative {
1529
+ background-color: #22543d;
1530
+ color: #c6f6d5;
1531
+ border-left-color: #68d391;
1532
+ }
1533
+ .hallucination-negative h3 {
1534
+ color: #68d391;
1535
+ }
1536
+ .hallucination-negative p {
1537
+ color: #c6f6d5;
1538
+ }
1539
+ .response-box {
1540
+ background-color: #1a202c;
1541
+ color: #e2e8f0;
1542
+ border-left-color: #4a5568;
1543
+ }
1544
+ .example-query {
1545
+ background-color: #2a4365;
1546
+ border-color: #2c5282;
1547
+ color: #bee3f8;
1548
+ }
1549
+ .example-query:hover {
1550
+ background-color: #3182ce;
1551
+ }
1552
+ .stats-section {
1553
+ background-color: #2a4365;
1554
+ border-color: #2c5282;
1555
+ }
1556
+ .stat-value {
1557
+ color: #bee3f8;
1558
+ }
1559
+ .stat-label {
1560
+ color: #90cdf4;
1561
+ }
1562
+ .feedback-section {
1563
+ border-top-color: #4a5568;
1564
+ }
1565
+ .footer {
1566
+ color: #a0aec0;
1567
+ }
1568
+ .processing-status {
1569
+ background-color: #2a4365;
1570
+ border-left-color: #90cdf4;
1571
+ color: #bee3f8;
1572
+ }
1573
+ .debug-panel {
1574
+ background-color: #1a202c;
1575
+ border-color: #4a5568;
1576
+ color: #e2e8f0;
1577
+ }
1578
+ .progress-container {
1579
+ background-color: #2d3748;
1580
+ border-color: #4a5568;
1581
+ }
1582
+ .progress-bar-container {
1583
+ background-color: #4a5568;
1584
+ }
1585
+ .progress-bar {
1586
+ background-image: linear-gradient(to right, #90cdf4, #63b3ed);
1587
+ }
1588
+ .query-display {
1589
+ color: #a0aec0;
1590
+ background-color: #1a202c;
1591
+ border-left-color: #90cdf4;
1592
+ }
1593
+ }
1594
+
1595
+ /* Gradio theme detection fallbacks */
1596
+ html[data-theme="dark"] .title,
1597
+ .dark .title {
1598
+ color: #63b3ed !important;
1599
+ }
1600
+ html[data-theme="dark"] .subtitle,
1601
+ .dark .subtitle {
1602
+ color: #a0aec0 !important;
1603
+ }
1604
+ html[data-theme="dark"] .section-title,
1605
+ .dark .section-title {
1606
+ color: #90cdf4 !important;
1607
+ }
1608
+ html[data-theme="dark"] .info-box,
1609
+ .dark .info-box {
1610
+ background-color: #2d3748 !important;
1611
+ color: #e2e8f0 !important;
1612
+ border-color: #4a5568 !important;
1613
+ border-left-color: #718096 !important;
1614
+ }
1615
+ html[data-theme="dark"] .info-box p strong,
1616
+ .dark .info-box p strong {
1617
+ color: #f7fafc !important;
1618
+ }
1619
+ html[data-theme="dark"] .response-box,
1620
+ .dark .response-box {
1621
+ background-color: #1a202c !important;
1622
+ color: #e2e8f0 !important;
1623
+ border-left-color: #4a5568 !important;
1624
+ }
1625
+ html[data-theme="dark"] .example-query,
1626
+ .dark .example-query {
1627
+ background-color: #2a4365 !important;
1628
+ border-color: #2c5282 !important;
1629
+ color: #bee3f8 !important;
1630
+ }
1631
+ html[data-theme="dark"] .stats-section,
1632
+ .dark .stats-section {
1633
+ background-color: #2a4365 !important;
1634
+ border-color: #2c5282 !important;
1635
+ }
1636
+ html[data-theme="dark"] .stat-value,
1637
+ .dark .stat-value {
1638
+ color: #bee3f8 !important;
1639
+ }
1640
+ html[data-theme="dark"] .stat-label,
1641
+ .dark .stat-label {
1642
+ color: #90cdf4 !important;
1643
+ }
1644
+ html[data-theme="dark"] .processing-status,
1645
+ .dark .processing-status {
1646
+ background-color: #2a4365 !important;
1647
+ border-left-color: #90cdf4 !important;
1648
+ color: #bee3f8 !important;
1649
+ }
1650
+ html[data-theme="dark"] .debug-panel,
1651
+ .dark .debug-panel {
1652
+ background-color: #1a202c !important;
1653
+ border-color: #4a5568 !important;
1654
+ color: #e2e8f0 !important;
1655
+ }
1656
+ html[data-theme="dark"] .progress-container,
1657
+ .dark .progress-container {
1658
+ background-color: #2d3748 !important;
1659
+ border-color: #4a5568 !important;
1660
+ }
1661
+ html[data-theme="dark"] .progress-bar-container,
1662
+ .dark .progress-bar-container {
1663
+ background-color: #4a5568 !important;
1664
+ }
1665
+ html[data-theme="dark"] .query-display,
1666
+ .dark .query-display {
1667
+ color: #a0aec0 !important;
1668
+ background-color: #1a202c !important;
1669
+ border-left-color: #90cdf4 !important;
1670
+ }
1671
+
1672
+ /* Additional theme-aware classes */
1673
+ .model-info-bar {
1674
+ background-color: #ebf8ff;
1675
+ padding: 10px 15px;
1676
+ border-radius: 8px;
1677
+ margin-bottom: 15px;
1678
+ display: flex;
1679
+ justify-content: space-between;
1680
+ border: 1px solid #bee3f8;
1681
+ }
1682
+ .model-info-section {
1683
+ flex: 1;
1684
+ text-align: center;
1685
+ padding-right: 10px;
1686
+ border-right: 1px solid #bee3f8;
1687
+ }
1688
+ .model-info-section:last-child {
1689
+ border-right: none;
1690
+ padding-right: 0;
1691
+ padding-left: 10px;
1692
+ }
1693
+ .model-label {
1694
+ font-weight: bold;
1695
+ color: #2c5282;
1696
+ }
1697
+ .model-name {
1698
+ font-size: 1.2em;
1699
+ color: #2b6cb0;
1700
+ }
1701
+ .app-title {
1702
+ font-size: 2.2em;
1703
+ font-weight: 600;
1704
+ color: #2c5282;
1705
+ margin-bottom: 0.2em;
1706
+ }
1707
+ .app-subtitle {
1708
+ font-size: 1.3em;
1709
+ color: #4a5568;
1710
+ margin-bottom: 0.8em;
1711
+ }
1712
+ .app-description {
1713
+ font-size: 1.1em;
1714
+ color: #718096;
1715
+ max-width: 800px;
1716
+ margin: 0 auto;
1717
+ }
1718
+ .section-meta {
1719
+ font-size: 0.8em;
1720
+ color: #718096;
1721
+ }
1722
+ .divider-line {
1723
+ margin-top: 20px;
1724
+ border-top: 1px dashed #e2e8f0;
1725
+ padding-top: 15px;
1726
+ font-size: 0.9em;
1727
+ color: #718096;
1728
+ text-align: center;
1729
+ }
1730
+ .info-message {
1731
+ padding: 20px;
1732
+ background-color: #ebf8ff;
1733
+ border-radius: 8px;
1734
+ text-align: center;
1735
+ margin: 20px 0;
1736
+ border: 1px solid #bee3f8;
1737
+ }
1738
+ .info-message h3 {
1739
+ margin-top: 0;
1740
+ color: #2c5282;
1741
+ }
1742
+ .error-message {
1743
+ padding: 20px;
1744
+ background-color: #ffeaea;
1745
+ border-radius: 8px;
1746
+ text-align: center;
1747
+ margin: 20px 0;
1748
+ border: 1px solid #fc8181;
1749
+ }
1750
+ .error-message h3 {
1751
+ margin-top: 0;
1752
+ color: #e53e3e;
1753
+ }
1754
+ .perf-metric {
1755
+ font-weight: 500;
1756
+ }
1757
+ .perf-generator {
1758
+ color: #38a169;
1759
+ }
1760
+ .perf-judge {
1761
+ color: #3182ce;
1762
+ }
1763
+ .perf-consistency {
1764
+ color: #805ad5;
1765
+ }
1766
+ .perf-distribution {
1767
+ color: #d69e2e;
1768
+ }
1769
+
1770
+ /* Dark theme versions */
1771
+ @media (prefers-color-scheme: dark) {
1772
+ .model-info-bar {
1773
+ background-color: #2a4365;
1774
+ border-color: #2c5282;
1775
+ }
1776
+ .model-info-section {
1777
+ border-right-color: #2c5282;
1778
+ }
1779
+ .model-label {
1780
+ color: #bee3f8;
1781
+ }
1782
+ .model-name {
1783
+ color: #90cdf4;
1784
+ }
1785
+ .app-title {
1786
+ color: #63b3ed;
1787
+ }
1788
+ .app-subtitle {
1789
+ color: #a0aec0;
1790
+ }
1791
+ .app-description {
1792
+ color: #cbd5e0;
1793
+ }
1794
+ .section-meta {
1795
+ color: #a0aec0;
1796
+ }
1797
+ .divider-line {
1798
+ border-top-color: #4a5568;
1799
+ color: #a0aec0;
1800
+ }
1801
+ .info-message {
1802
+ background-color: #2a4365;
1803
+ border-color: #2c5282;
1804
+ }
1805
+ .info-message h3 {
1806
+ color: #bee3f8;
1807
+ }
1808
+ .error-message {
1809
+ background-color: #553c39;
1810
+ border-color: #fc8181;
1811
+ }
1812
+ .error-message h3 {
1813
+ color: #fc8181;
1814
+ }
1815
+ .perf-generator {
1816
+ color: #68d391;
1817
+ }
1818
+ .perf-judge {
1819
+ color: #90cdf4;
1820
+ }
1821
+ .perf-consistency {
1822
+ color: #b794f6;
1823
+ }
1824
+ .perf-distribution {
1825
+ color: #f6e05e;
1826
+ }
1827
+ }
1828
+
1829
+ /* Gradio fallbacks for new classes */
1830
+ html[data-theme="dark"] .model-info-bar,
1831
+ .dark .model-info-bar {
1832
+ background-color: #2a4365 !important;
1833
+ border-color: #2c5282 !important;
1834
+ }
1835
+ html[data-theme="dark"] .model-label,
1836
+ .dark .model-label {
1837
+ color: #bee3f8 !important;
1838
+ }
1839
+ html[data-theme="dark"] .model-name,
1840
+ .dark .model-name {
1841
+ color: #90cdf4 !important;
1842
+ }
1843
+ html[data-theme="dark"] .app-title,
1844
+ .dark .app-title {
1845
+ color: #63b3ed !important;
1846
+ }
1847
+ html[data-theme="dark"] .app-subtitle,
1848
+ .dark .app-subtitle {
1849
+ color: #a0aec0 !important;
1850
+ }
1851
+ html[data-theme="dark"] .app-description,
1852
+ .dark .app-description {
1853
+ color: #cbd5e0 !important;
1854
+ }
1855
+ html[data-theme="dark"] .section-meta,
1856
+ .dark .section-meta {
1857
+ color: #a0aec0 !important;
1858
+ }
1859
+ html[data-theme="dark"] .divider-line,
1860
+ .dark .divider-line {
1861
+ border-top-color: #4a5568 !important;
1862
+ color: #a0aec0 !important;
1863
+ }
1864
+
1865
+ /* Progress model info styling */
1866
+ .progress-model-info {
1867
+ display: flex;
1868
+ justify-content: space-between;
1869
+ margin-top: 8px;
1870
+ font-size: 0.85em;
1871
+ color: #4a5568;
1872
+ background-color: #ebf8ff;
1873
+ padding: 5px 10px;
1874
+ border-radius: 4px;
1875
+ border: 1px solid #bee3f8;
1876
+ }
1877
+
1878
+ @media (prefers-color-scheme: dark) {
1879
+ .progress-model-info {
1880
+ color: #a0aec0;
1881
+ background-color: #2a4365;
1882
+ border-color: #2c5282;
1883
+ }
1884
+ }
1885
+
1886
+ html[data-theme="dark"] .progress-model-info,
1887
+ .dark .progress-model-info {
1888
+ color: #a0aec0 !important;
1889
+ background-color: #2a4365 !important;
1890
+ border-color: #2c5282 !important;
1891
+ }
1892
+
1893
+ /* Metrics explanation box styling */
1894
+ .metrics-explanation {
1895
+ margin-top: 15px;
1896
+ padding: 12px;
1897
+ background-color: #f7fafc;
1898
+ border-radius: 8px;
1899
+ font-size: 0.95em;
1900
+ color: #2d3748;
1901
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1);
1902
+ border: 1px solid #e2e8f0;
1903
+ }
1904
+ .metrics-explanation p {
1905
+ margin-bottom: 8px;
1906
+ color: #2c5282;
1907
+ }
1908
+ .metrics-explanation ul {
1909
+ margin-top: 5px;
1910
+ padding-left: 20px;
1911
+ line-height: 1.4;
1912
+ }
1913
+ .metrics-explanation strong {
1914
+ color: #2b6cb0;
1915
+ }
1916
+
1917
+ @media (prefers-color-scheme: dark) {
1918
+ .metrics-explanation {
1919
+ background-color: #2d3748;
1920
+ color: #e2e8f0;
1921
+ border-color: #4a5568;
1922
+ }
1923
+ .metrics-explanation p {
1924
+ color: #90cdf4;
1925
+ }
1926
+ .metrics-explanation strong {
1927
+ color: #bee3f8;
1928
+ }
1929
+ }
1930
+
1931
+ html[data-theme="dark"] .metrics-explanation,
1932
+ .dark .metrics-explanation {
1933
+ background-color: #2d3748 !important;
1934
+ color: #e2e8f0 !important;
1935
+ border-color: #4a5568 !important;
1936
+ }
1937
+ html[data-theme="dark"] .metrics-explanation p,
1938
+ .dark .metrics-explanation p {
1939
+ color: #90cdf4 !important;
1940
+ }
1941
+ html[data-theme="dark"] .metrics-explanation strong,
1942
+ .dark .metrics-explanation strong {
1943
+ color: #bee3f8 !important;
1944
+ }
1945
+
1946
+ /* Leaderboard table styling */
1947
+ .leaderboard-container {
1948
+ margin: 15px 0;
1949
+ overflow-x: auto;
1950
+ }
1951
+ .leaderboard-table {
1952
+ width: 100%;
1953
+ border-collapse: collapse;
1954
+ font-size: 0.95em;
1955
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
1956
+ border-radius: 8px;
1957
+ overflow: hidden;
1958
+ border: 1px solid #e2e8f0;
1959
+ }
1960
+ .leaderboard-table thead {
1961
+ background-color: #3182ce;
1962
+ color: white;
1963
+ }
1964
+ .leaderboard-table th,
1965
+ .leaderboard-table td {
1966
+ padding: 12px 15px;
1967
+ text-align: left;
1968
+ border-bottom: 1px solid #e2e8f0;
1969
+ color: #2d3748;
1970
+ }
1971
+ .leaderboard-table thead th {
1972
+ color: white;
1973
+ border-bottom-color: #2c5282;
1974
+ }
1975
+ .leaderboard-table tbody tr {
1976
+ transition: background-color 0.3s;
1977
+ background-color: #ffffff;
1978
+ }
1979
+ .leaderboard-table tbody tr:nth-child(even) {
1980
+ background-color: #f7fafc;
1981
+ }
1982
+ .leaderboard-table tbody tr:hover {
1983
+ background-color: #ebf8ff;
1984
+ }
1985
+ .leaderboard-table tbody tr.top-rank-1 {
1986
+ background-color: #f0fff4;
1987
+ color: #22543d;
1988
+ font-weight: bold;
1989
+ }
1990
+ .leaderboard-table tbody tr.top-rank-2 {
1991
+ background-color: #fefcbf;
1992
+ color: #744210;
1993
+ font-weight: 500;
1994
+ }
1995
+ .leaderboard-table tbody tr.top-rank-3 {
1996
+ background-color: #fed7cc;
1997
+ color: #7c2d12;
1998
+ font-weight: 500;
1999
+ }
2000
+
2001
+ /* Dark theme leaderboard */
2002
+ @media (prefers-color-scheme: dark) {
2003
+ .leaderboard-table {
2004
+ border-color: #4a5568;
2005
+ box-shadow: 0 2px 10px rgba(0,0,0,0.3);
2006
+ }
2007
+ .leaderboard-table thead {
2008
+ background-color: #2c5282;
2009
+ }
2010
+ .leaderboard-table th,
2011
+ .leaderboard-table td {
2012
+ border-bottom-color: #4a5568;
2013
+ color: #e2e8f0;
2014
+ }
2015
+ .leaderboard-table thead th {
2016
+ border-bottom-color: #1a365d;
2017
+ }
2018
+ .leaderboard-table tbody tr {
2019
+ background-color: #2d3748;
2020
+ }
2021
+ .leaderboard-table tbody tr:nth-child(even) {
2022
+ background-color: #1a202c;
2023
+ }
2024
+ .leaderboard-table tbody tr:hover {
2025
+ background-color: #2a4365;
2026
+ }
2027
+ .leaderboard-table tbody tr.top-rank-1 {
2028
+ background-color: #22543d;
2029
+ color: #c6f6d5;
2030
+ }
2031
+ .leaderboard-table tbody tr.top-rank-2 {
2032
+ background-color: #744210;
2033
+ color: #fefcbf;
2034
+ }
2035
+ .leaderboard-table tbody tr.top-rank-3 {
2036
+ background-color: #7c2d12;
2037
+ color: #fed7cc;
2038
+ }
2039
+ }
2040
+
2041
+ /* Gradio fallbacks for leaderboard */
2042
+ html[data-theme="dark"] .leaderboard-table,
2043
+ .dark .leaderboard-table {
2044
+ border-color: #4a5568 !important;
2045
+ box-shadow: 0 2px 10px rgba(0,0,0,0.3) !important;
2046
+ }
2047
+ html[data-theme="dark"] .leaderboard-table thead,
2048
+ .dark .leaderboard-table thead {
2049
+ background-color: #2c5282 !important;
2050
+ }
2051
+ html[data-theme="dark"] .leaderboard-table th,
2052
+ html[data-theme="dark"] .leaderboard-table td,
2053
+ .dark .leaderboard-table th,
2054
+ .dark .leaderboard-table td {
2055
+ border-bottom-color: #4a5568 !important;
2056
+ color: #e2e8f0 !important;
2057
+ }
2058
+ html[data-theme="dark"] .leaderboard-table thead th,
2059
+ .dark .leaderboard-table thead th {
2060
+ border-bottom-color: #1a365d !important;
2061
+ color: white !important;
2062
+ }
2063
+ html[data-theme="dark"] .leaderboard-table tbody tr,
2064
+ .dark .leaderboard-table tbody tr {
2065
+ background-color: #2d3748 !important;
2066
+ }
2067
+ html[data-theme="dark"] .leaderboard-table tbody tr:nth-child(even),
2068
+ .dark .leaderboard-table tbody tr:nth-child(even) {
2069
+ background-color: #1a202c !important;
2070
+ }
2071
+ html[data-theme="dark"] .leaderboard-table tbody tr:hover,
2072
+ .dark .leaderboard-table tbody tr:hover {
2073
+ background-color: #2a4365 !important;
2074
  }
2075
  """
2076
 
 
2337
  <div class="container">
2338
  <h2 class="title">Hallucination Detection Results</h2>
2339
 
2340
+ <div class="model-info-bar">
2341
+ <div class="model-info-section">
2342
+ <div class="model-label">Generator Model</div>
2343
+ <div class="model-name">{generator_model}</div>
2344
  </div>
2345
+ <div class="model-info-section">
2346
+ <div class="model-label">Judge Model</div>
2347
+ <div class="model-name">{judge_model}</div>
2348
  </div>
2349
  </div>
2350
 
 
2377
  {original_query}
2378
  </div>
2379
 
2380
+ <div class="section-title">Original Response <span class="section-meta">(generated by {generator_model})</span></div>
2381
  <div class="response-box">
2382
  {original_response_safe}
2383
  </div>
 
2392
  {q}
2393
  </div>
2394
 
2395
+ <div class="section-title">Response {i} <span class="section-meta">(generated by {generator_model})</span></div>
2396
  <div class="response-box">
2397
  {r}
2398
  </div>
2399
  """
2400
 
2401
  html_output += f"""
2402
+ <div class="section-title">Detailed Analysis <span class="section-meta">(judged by {judge_model})</span></div>
2403
  <div class="info-box">
2404
  <p><strong>Reasoning:</strong></p>
2405
  <p>{reasoning_safe}</p>
 
2408
  <p>{conflicting_facts_text_safe}</p>
2409
  </div>
2410
 
2411
+ <div class="divider-line">
2412
  Models randomly selected for this analysis: <strong>{generator_model}</strong> (Generator) and <strong>{judge_model}</strong> (Judge)
2413
  </div>
2414
  </div>
 
2533
  gr.HTML(
2534
  """
2535
  <div style="text-align: center; margin-bottom: 1.5rem">
2536
+ <h1 class="app-title">PAS2 - Hallucination Detector</h1>
2537
+ <h3 class="app-subtitle">Advanced AI Response Verification Using Model-as-Judge</h3>
2538
+ <p class="app-description">
2539
  This tool detects hallucinations in AI responses by comparing answers to semantically equivalent questions and using a specialized judge model.
2540
  </p>
2541
  </div>
 
2663
 
2664
  if not pairs:
2665
  return (
2666
+ "<div class=\"info-message\">"
2667
+ "<h3>No Data Available Yet</h3>"
 
2668
  "<p>Try the detector with more queries to populate the leaderboard!</p>"
2669
  "</div>"
2670
  )
 
2693
  f"<td>{pair.get('judge', 'unknown')}</td>"
2694
  f"<td>{round(pair.get('elo_score', 0))}</td>"
2695
  f"<td>{pair.get('accuracy')}%</td>"
2696
+ f"<td class='perf-metric perf-generator'>{generator_perf}</td>"
2697
+ f"<td class='perf-metric perf-judge'>{judge_perf}</td>"
2698
+ f"<td class='perf-metric perf-consistency'>{consistency}</td>"
2699
  f"<td>{pair.get('total_samples', 0)}</td>"
2700
  f"</tr>"
2701
  )
 
2722
  f"</tbody>"
2723
  f"</table>"
2724
  f"</div>"
2725
+ f"<div class='metrics-explanation'>"
2726
+ f"<p><strong>Model Pair Performance Metrics:</strong></p>"
2727
+ f"<ul>"
2728
+ f"<li><strong>Accuracy</strong>: Percentage of correct hallucination judgments based on user feedback</li>"
2729
+ f"<li><strong>Generator Performance</strong>: How well the generator model avoids hallucinations</li>"
2730
+ f"<li><strong>Judge Performance</strong>: How accurately the judge model identifies hallucinations</li>"
2731
+ f"<li><strong>Consistency</strong>: Weighted measure of how well the pair works together</li>"
2732
  f"</ul>"
2733
  f"</div>"
2734
  )
 
2737
  except Exception as e:
2738
  logger.error("Error generating leaderboard HTML: %s", str(e), exc_info=True)
2739
  return (
2740
+ f"<div class=\"error-message\">"
2741
+ f"<h3>Error Loading Leaderboard</h3>"
 
2742
  f"<p>{str(e)}</p>"
2743
  f"</div>"
2744
  )
 
2778
  "<li>deepseek-reasoner</li><li>o4-mini</li><li>gemini-2.5-pro</li>" +
2779
  "</ul></div></div></div></div></div>"
2780
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2781
 
2782
  # Tab 3: Individual Models Leaderboard
2783
  with gr.TabItem("Individual Models", elem_id="user-feedback-tab"):
 
2792
 
2793
  if not models:
2794
  return (
2795
+ "<div class=\"info-message\">"
2796
+ "<h3>No Data Available Yet</h3>"
 
2797
  "<p>Try the detector with more queries to populate the model scores!</p>"
2798
  "</div>"
2799
  )
 
2830
  f"<td>{model.get('model_name', 'unknown')}</td>"
2831
  f"<td>{round(model.get('elo_score', 0))}</td>"
2832
  f"<td>{model.get('accuracy')}%</td>"
2833
+ f"<td class='perf-metric perf-generator'>{generator_acc}</td>"
2834
+ f"<td class='perf-metric perf-judge'>{judge_acc}</td>"
2835
  f"<td>{model.get('total_samples', 0)}</td>"
2836
+ f"<td class='perf-metric perf-distribution'>{role_distribution}</td>"
2837
  f"</tr>"
2838
  )
2839
 
 
2864
  except Exception as e:
2865
  logger.error("Error generating model leaderboard HTML: %s", str(e), exc_info=True)
2866
  return (
2867
+ f"<div class=\"error-message\">"
2868
+ f"<h3>Error Loading Model Leaderboard</h3>"
 
2869
  f"<p>{str(e)}</p>"
2870
  f"</div>"
2871
  )
 
2922
  accuracy_pct = f"{accuracy * 100:.1f}%"
2923
 
2924
  stats_html = f"""
2925
+ <div class="stats-section">
2926
  <div class="stat-item">
2927
+ <div class="stat-value">{total}</div>
2928
+ <div class="stat-label">Total Responses</div>
2929
  </div>
2930
  <div class="stat-item">
2931
+ <div class="stat-value">{accuracy_pct}</div>
2932
+ <div class="stat-label">Correct Predictions</div>
2933
  </div>
2934
  </div>
2935
+ <div class="section-meta" style="text-align: center; margin-top: 10px; font-style: italic;">
2936
  Based on user feedback: {correct} correct out of {total} total predictions
2937
  </div>
2938
  """