MaziyarPanahi commited on
Commit
09aab35
·
1 Parent(s): 59f16cb
Files changed (1) hide show
  1. app.py +176 -61
app.py CHANGED
@@ -3,20 +3,20 @@ import pandas as pd
3
  import numpy as np
4
 
5
  # Load data from TSV file
6
- df = pd.read_csv('FACTS.tsv', sep='\t')
7
 
8
  # Clean up the data
9
  df = df.dropna() # Remove any rows with missing values
10
  df.columns = df.columns.str.strip() # Remove any whitespace from column names
11
 
12
  # Rename columns to match our expected format
13
- df = df.rename(columns={
14
- 'model': 'Model Name',
15
- 'size': 'Size'
16
- })
17
 
18
  # Create size display format
19
- df["Size_Display"] = df["Size"].apply(lambda x: f"{int(x)}B" if x == int(x) else f"{x}B")
 
 
 
20
 
21
  # Add size category for filtering
22
  def get_size_category(size):
@@ -33,6 +33,7 @@ def get_size_category(size):
33
  else:
34
  return ">80B"
35
 
 
36
  df["Size_Category"] = df["Size"].apply(get_size_category)
37
 
38
 
@@ -77,11 +78,58 @@ def filter_and_search_models(search_query, size_ranges, sort_by):
77
  # Round numerical values for better display
78
  for col in ["Separate Grounding Score", "Separate Quality Score", "Combined Score"]:
79
  display_df = display_df.copy() # Create a copy to avoid SettingWithCopyWarning
80
- display_df[col] = display_df[col].round(6)
81
 
82
  return display_df
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # Create the Gradio interface
86
  with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as app:
87
  gr.Markdown("# 🏆 FACTS Grounding Leaderboard")
@@ -127,33 +175,26 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
127
  total_models = gr.Markdown(f"**Showing {len(df)} models**")
128
 
129
  # Results table below filters
130
- results_table = gr.Dataframe(
131
- value=filter_and_search_models(
132
- "",
133
- ["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
134
- "Combined Score",
 
 
135
  ),
136
- headers=[
137
- "Rank",
138
- "Model Name",
139
- "Size",
140
- "Separate Grounding Score",
141
- "Separate Quality Score",
142
- "Combined Score",
143
- ],
144
- datatype=["number", "str", "str", "number", "number", "number"],
145
  elem_id="leaderboard-table",
146
- interactive=False,
147
- wrap=True,
148
  )
149
 
150
  # Metric explanations at the bottom
151
  with gr.Accordion("Metric Explanations", open=False):
152
- gr.Markdown("""
153
- - **Grounding Score**: Measures the model's ability to provide factually accurate responses based on given context
154
- - **Quality Score**: Evaluates the overall quality of the model's responses including coherence and relevance
155
- - **Combined Score**: A weighted combination of grounding and quality scores representing overall performance
156
- """)
 
 
157
 
158
  with gr.TabItem("About"):
159
  gr.Markdown(
@@ -206,7 +247,7 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
206
  def update_table(search, sizes, sort_by):
207
  filtered_df = filter_and_search_models(search, sizes, sort_by)
208
  model_count = f"**Showing {len(filtered_df)} models**"
209
- return filtered_df, model_count
210
 
211
  # Connect all inputs to the update function
212
  search_box.change(
@@ -229,14 +270,46 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
229
 
230
  # Add custom CSS for better styling
231
  app.css = """
232
- #leaderboard-table {
233
- font-size: 14px;
234
  margin-top: 20px;
235
  max-height: 600px;
236
  overflow-y: auto;
 
 
 
 
 
 
 
 
 
237
  }
238
 
239
- #leaderboard-table td:first-child {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  text-align: center;
241
  font-weight: 600;
242
  color: #444;
@@ -244,64 +317,106 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
244
  width: 60px;
245
  }
246
 
247
- #leaderboard-table td:nth-child(2) {
248
  font-weight: 500;
249
  max-width: 400px;
 
250
  }
251
 
252
- #leaderboard-table td:nth-child(3) {
253
  text-align: center;
254
  font-weight: 500;
255
  color: #666;
 
256
  }
257
 
258
- #leaderboard-table td:nth-child(n+4) {
259
  text-align: center;
 
 
260
  }
261
 
262
- .size-filter {
263
- display: flex;
264
- flex-wrap: wrap;
265
- gap: 15px;
266
- margin-top: 10px;
267
  }
268
 
269
- .size-filter label {
270
- display: flex;
271
- align-items: center;
272
- margin: 0;
273
  }
274
 
275
- .size-filter input[type="checkbox"] {
276
- margin-right: 5px;
277
  }
278
 
279
- /* Highlight rows based on model family */
280
- #leaderboard-table tr:has(td:contains("meta-llama")) {
281
- background-color: #fffbf0;
282
  }
283
 
284
- #leaderboard-table tr:has(td:contains("deepseek")) {
285
- background-color: #f0f8ff;
286
  }
287
 
288
- #leaderboard-table tr:has(td:contains("Qwen")) {
289
- background-color: #f5fff5;
290
  }
291
 
292
- #leaderboard-table tr:has(td:contains("google")) {
293
  background-color: #fff0f5;
294
  }
295
 
296
- /* Header styling */
297
- #leaderboard-table th {
298
- background-color: #f8f9fa;
299
- font-weight: 600;
300
  }
301
 
302
- #leaderboard-table th:first-child {
303
- width: 60px;
304
- text-align: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  }
306
  """
307
 
 
3
  import numpy as np
4
 
5
  # Load data from TSV file
6
+ df = pd.read_csv("FACTS.tsv", sep="\t")
7
 
8
  # Clean up the data
9
  df = df.dropna() # Remove any rows with missing values
10
  df.columns = df.columns.str.strip() # Remove any whitespace from column names
11
 
12
  # Rename columns to match our expected format
13
+ df = df.rename(columns={"model": "Model Name", "size": "Size"})
 
 
 
14
 
15
  # Create size display format
16
+ df["Size_Display"] = df["Size"].apply(
17
+ lambda x: f"{int(x)}B" if x == int(x) else f"{x}B"
18
+ )
19
+
20
 
21
  # Add size category for filtering
22
  def get_size_category(size):
 
33
  else:
34
  return ">80B"
35
 
36
+
37
  df["Size_Category"] = df["Size"].apply(get_size_category)
38
 
39
 
 
78
  # Round numerical values for better display
79
  for col in ["Separate Grounding Score", "Separate Quality Score", "Combined Score"]:
80
  display_df = display_df.copy() # Create a copy to avoid SettingWithCopyWarning
81
+ display_df[col] = display_df[col].round(3) # Reduced to 3 decimal places
82
 
83
  return display_df
84
 
85
 
86
+ def create_html_table(df):
87
+ """Create an HTML table from the dataframe"""
88
+ html = '<div class="leaderboard-container">'
89
+ html += '<table class="leaderboard-table">'
90
+
91
+ # Header
92
+ html += "<thead><tr>"
93
+ for col in df.columns:
94
+ html += f"<th>{col}</th>"
95
+ html += "</tr></thead>"
96
+
97
+ # Body
98
+ html += "<tbody>"
99
+ for _, row in df.iterrows():
100
+ # Add model family class for styling
101
+ model_name = row["Model Name"]
102
+ row_class = ""
103
+ if "meta-llama" in model_name:
104
+ row_class = "llama-row"
105
+ elif "deepseek" in model_name:
106
+ row_class = "deepseek-row"
107
+ elif "Qwen" in model_name:
108
+ row_class = "qwen-row"
109
+ elif "google" in model_name:
110
+ row_class = "google-row"
111
+
112
+ html += f'<tr class="{row_class}">'
113
+ for i, col in enumerate(df.columns):
114
+ cell_class = ""
115
+ if i == 0: # Rank column
116
+ cell_class = "rank-cell"
117
+ elif i == 1: # Model name
118
+ cell_class = "model-cell"
119
+ elif i == 2: # Size
120
+ cell_class = "size-cell"
121
+ else: # Score columns
122
+ cell_class = "score-cell"
123
+
124
+ html += f'<td class="{cell_class}">{row[col]}</td>'
125
+ html += "</tr>"
126
+ html += "</tbody>"
127
+ html += "</table>"
128
+ html += "</div>"
129
+
130
+ return html
131
+
132
+
133
  # Create the Gradio interface
134
  with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as app:
135
  gr.Markdown("# 🏆 FACTS Grounding Leaderboard")
 
175
  total_models = gr.Markdown(f"**Showing {len(df)} models**")
176
 
177
  # Results table below filters
178
+ results_table = gr.HTML(
179
+ value=create_html_table(
180
+ filter_and_search_models(
181
+ "",
182
+ ["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
183
+ "Combined Score",
184
+ )
185
  ),
 
 
 
 
 
 
 
 
 
186
  elem_id="leaderboard-table",
 
 
187
  )
188
 
189
  # Metric explanations at the bottom
190
  with gr.Accordion("Metric Explanations", open=False):
191
+ gr.Markdown(
192
+ """
193
+ - **Grounding Score**: Percentage of responses where all claims are supported by the context
194
+ - **Quality Score**: Percentage of responses that adequately address the user's request
195
+ - **Combined Score**: Percentage of responses that pass both quality and grounding checks
196
+ """
197
+ )
198
 
199
  with gr.TabItem("About"):
200
  gr.Markdown(
 
247
  def update_table(search, sizes, sort_by):
248
  filtered_df = filter_and_search_models(search, sizes, sort_by)
249
  model_count = f"**Showing {len(filtered_df)} models**"
250
+ return create_html_table(filtered_df), model_count
251
 
252
  # Connect all inputs to the update function
253
  search_box.change(
 
270
 
271
  # Add custom CSS for better styling
272
  app.css = """
273
+ .leaderboard-container {
 
274
  margin-top: 20px;
275
  max-height: 600px;
276
  overflow-y: auto;
277
+ border-radius: 8px;
278
+ border: 1px solid #e9ecef;
279
+ }
280
+
281
+ .leaderboard-table {
282
+ width: 100%;
283
+ border-collapse: collapse;
284
+ font-size: 14px;
285
+ background: white;
286
  }
287
 
288
+ .leaderboard-table th {
289
+ background-color: #f8f9fa;
290
+ font-weight: 600;
291
+ padding: 12px 8px;
292
+ text-align: center;
293
+ border-bottom: 2px solid #dee2e6;
294
+ position: sticky;
295
+ top: 0;
296
+ z-index: 10;
297
+ }
298
+
299
+ .leaderboard-table th:first-child {
300
+ width: 60px;
301
+ }
302
+
303
+ .leaderboard-table td {
304
+ padding: 10px 8px;
305
+ border-bottom: 1px solid #f1f3f4;
306
+ }
307
+
308
+ .leaderboard-table tbody tr:hover {
309
+ background-color: #f8f9fa;
310
+ }
311
+
312
+ .rank-cell {
313
  text-align: center;
314
  font-weight: 600;
315
  color: #444;
 
317
  width: 60px;
318
  }
319
 
320
+ .model-cell {
321
  font-weight: 500;
322
  max-width: 400px;
323
+ word-wrap: break-word;
324
  }
325
 
326
+ .size-cell {
327
  text-align: center;
328
  font-weight: 500;
329
  color: #666;
330
+ min-width: 60px;
331
  }
332
 
333
+ .score-cell {
334
  text-align: center;
335
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
336
+ font-size: 13px;
337
  }
338
 
339
+ /* Model family row styling */
340
+ .llama-row {
341
+ background-color: #fffbf0;
 
 
342
  }
343
 
344
+ .llama-row:hover {
345
+ background-color: #fef7e0;
 
 
346
  }
347
 
348
+ .deepseek-row {
349
+ background-color: #f0f8ff;
350
  }
351
 
352
+ .deepseek-row:hover {
353
+ background-color: #e6f3ff;
 
354
  }
355
 
356
+ .qwen-row {
357
+ background-color: #f5fff5;
358
  }
359
 
360
+ .qwen-row:hover {
361
+ background-color: #eaffea;
362
  }
363
 
364
+ .google-row {
365
  background-color: #fff0f5;
366
  }
367
 
368
+ .google-row:hover {
369
+ background-color: #ffe6f0;
 
 
370
  }
371
 
372
+ .size-filter {
373
+ margin-top: 10px;
374
+ }
375
+
376
+ .size-filter > div {
377
+ display: flex !important;
378
+ flex-wrap: wrap !important;
379
+ gap: 8px !important;
380
+ align-items: center !important;
381
+ }
382
+
383
+ .size-filter label {
384
+ display: flex !important;
385
+ align-items: center !important;
386
+ background: #f8f9fa !important;
387
+ border: 2px solid #e9ecef !important;
388
+ border-radius: 8px !important;
389
+ padding: 8px 12px !important;
390
+ margin: 0 !important;
391
+ cursor: pointer !important;
392
+ transition: all 0.2s ease !important;
393
+ font-weight: 500 !important;
394
+ font-size: 14px !important;
395
+ color: #495057 !important;
396
+ min-width: 70px !important;
397
+ justify-content: center !important;
398
+ }
399
+
400
+ .size-filter label:hover {
401
+ background: #e9ecef !important;
402
+ border-color: #6c757d !important;
403
+ }
404
+
405
+ .size-filter input[type="checkbox"] {
406
+ display: none !important;
407
+ }
408
+
409
+ .size-filter input[type="checkbox"]:checked + span {
410
+ background: #0d6efd !important;
411
+ color: white !important;
412
+ border-color: #0d6efd !important;
413
+ }
414
+
415
+ .size-filter label:has(input[type="checkbox"]:checked) {
416
+ background: #0d6efd !important;
417
+ color: white !important;
418
+ border-color: #0d6efd !important;
419
+ box-shadow: 0 2px 4px rgba(13, 110, 253, 0.2) !important;
420
  }
421
  """
422