k-mktr commited on
Commit
f5e465c
·
verified ·
1 Parent(s): 7ddc2df

Update leaderboard.py

Browse files
Files changed (1) hide show
  1. leaderboard.py +58 -253
leaderboard.py CHANGED
@@ -6,7 +6,6 @@ from datetime import datetime
6
  import threading
7
  import config
8
  import math
9
- import plotly.graph_objects as go
10
 
11
  # Initialize Nextcloud client
12
  nc = Nextcloud(nextcloud_url=config.NEXTCLOUD_URL, nc_auth_user=config.NEXTCLOUD_USERNAME, nc_auth_pass=config.NEXTCLOUD_PASSWORD)
@@ -112,101 +111,39 @@ def get_human_readable_name(model_name: str) -> str:
112
  def get_leaderboard():
113
  leaderboard = load_leaderboard()
114
 
115
- # Calculate scores for each model
116
- for model, results in leaderboard.items():
117
- total_battles = results["wins"] + results["losses"]
118
- if total_battles > 0:
119
- win_rate = results["wins"] / total_battles
120
- results["score"] = win_rate * (1 - 1 / (total_battles + 1))
121
- else:
122
- results["score"] = 0
123
-
124
- # Sort results by score, then by total battles
125
- sorted_results = sorted(
126
- leaderboard.items(),
127
- key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
128
- reverse=True
129
- )
130
- # Explanation of the main leaderboard
131
- explanation = """
132
- <p style="font-size: 16px; margin-bottom: 20px;">
133
- This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
134
- <br>
135
- <strong>Score = Win Rate * (1 - 1 / (Total Battles + 1))</strong>
136
- <br>
137
- This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.
138
- </p>
139
- """
140
-
141
- leaderboard_html = f"""
142
- {explanation}
143
- <style>
144
- .leaderboard-table {{
145
- width: 100%;
146
- border-collapse: collapse;
147
- font-family: Arial, sans-serif;
148
- }}
149
- .leaderboard-table th, .leaderboard-table td {{
150
- border: 1px solid #ddd;
151
- padding: 8px;
152
- text-align: left;
153
- }}
154
- .leaderboard-table th {{
155
- background-color: rgba(255, 255, 255, 0.1);
156
- font-weight: bold;
157
- }}
158
- .rank-column {{
159
- width: 60px;
160
- text-align: center;
161
- }}
162
- .opponent-details {{
163
- font-size: 0.9em;
164
- color: #888;
165
- }}
166
- </style>
167
- <table class='leaderboard-table'>
168
- <tr>
169
- <th class='rank-column'>Rank</th>
170
- <th>Model</th>
171
- <th>Score</th>
172
- <th>Wins</th>
173
- <th>Losses</th>
174
- <th>Win Rate</th>
175
- <th>Total Battles</th>
176
- <th>Top Rival</th>
177
- <th>Toughest Opponent</th>
178
- </tr>
179
- """
180
 
181
- for index, (model, results) in enumerate(sorted_results, start=1):
182
- total_battles = results["wins"] + results["losses"]
183
- win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
 
184
 
185
- rank_display = {1: "🥇", 2: "🥈", 3: "🥉"}.get(index, f"{index}")
 
186
 
187
- top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0}))
188
- top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A"
189
- top_rival_wins = top_rival[1]["wins"]
190
 
191
- toughest_opponent = max(results["opponents"].items(), key=lambda x: x[1]["losses"], default=(None, {"losses": 0}))
192
- toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A"
193
- toughest_opponent_losses = toughest_opponent[1]["losses"]
194
 
195
- leaderboard_html += f"""
196
- <tr>
197
- <td class='rank-column'>{rank_display}</td>
198
- <td>{get_human_readable_name(model)}</td>
199
- <td>{results['score']:.4f}</td>
200
- <td>{results['wins']}</td>
201
- <td>{results['losses']}</td>
202
- <td>{win_rate:.2f}%</td>
203
- <td>{total_battles}</td>
204
- <td class='opponent-details'>{top_rival_name} (W: {top_rival_wins})</td>
205
- <td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td>
206
- </tr>
207
- """
208
- leaderboard_html += "</table>"
209
- return leaderboard_html
210
 
211
  def calculate_elo_impact(model):
212
  positive_impact = 0
@@ -232,101 +169,42 @@ def calculate_elo_impact(model):
232
 
233
  def get_elo_leaderboard():
234
  ensure_elo_ratings_initialized()
235
- leaderboard = load_leaderboard()
236
 
237
- # Create a list of all models, including those from APPROVED_MODELS that might not be in the leaderboard yet
 
 
 
 
238
  all_models = set(dict(config.get_approved_models()).keys()) | set(leaderboard.keys())
239
 
240
- elo_data = []
241
  for model in all_models:
242
- initial_rating = 1000 + (get_model_size(model) * 100)
243
- current_rating = elo_ratings.get(model, initial_rating)
244
 
245
- # Calculate battle data only if the model exists in the leaderboard
246
- if model in leaderboard:
247
- wins = leaderboard[model].get('wins', 0)
248
- losses = leaderboard[model].get('losses', 0)
249
- total_battles = wins + losses
250
- positive_impact, negative_impact, _ = calculate_elo_impact(model)
251
- else:
252
- wins = losses = total_battles = positive_impact = negative_impact = 0
253
 
254
- elo_data.append({
255
- 'model': model,
256
- 'current_rating': current_rating,
257
- 'initial_rating': initial_rating,
258
- 'total_battles': total_battles,
259
- 'positive_impact': positive_impact,
260
- 'negative_impact': negative_impact
261
- })
262
-
263
- # Sort the data by current rating
264
- sorted_elo_data = sorted(elo_data, key=lambda x: x['current_rating'], reverse=True)
265
-
266
- min_initial_rating = min(data['initial_rating'] for data in elo_data)
267
- max_initial_rating = max(data['initial_rating'] for data in elo_data)
268
-
269
- explanation_elo = f"""
270
- <p style="font-size: 16px; margin-bottom: 20px;">
271
- This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
272
- Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
273
- The "Positive Impact" score reflects the significance of wins, with higher scores for defeating larger models.
274
- The "Negative Impact" score indicates the significance of losses, with higher scores for losing against smaller models.
275
- The current ELO rating is calculated based on these impacts and the model's performance history.
276
- </p>
277
- """
278
-
279
- leaderboard_html = f"""
280
- {explanation_elo}
281
- <style>
282
- .elo-leaderboard-table {{
283
- width: 100%;
284
- border-collapse: collapse;
285
- font-family: Arial, sans-serif;
286
- }}
287
- .elo-leaderboard-table th, .elo-leaderboard-table td {{
288
- border: 1px solid #ddd;
289
- padding: 8px;
290
- text-align: left;
291
- }}
292
- .elo-leaderboard-table th {{
293
- background-color: rgba(255, 255, 255, 0.1);
294
- font-weight: bold;
295
- }}
296
- .rank-column {{
297
- width: 60px;
298
- text-align: center;
299
- }}
300
- </style>
301
- <table class='elo-leaderboard-table'>
302
- <tr>
303
- <th class='rank-column'>Rank</th>
304
- <th>Model</th>
305
- <th>Current ELO Rating</th>
306
- <th>Positive Impact</th>
307
- <th>Negative Impact</th>
308
- <th>Total Battles</th>
309
- <th>Initial Rating</th>
310
- </tr>
311
- """
312
-
313
- for index, data in enumerate(sorted_elo_data, start=1):
314
- rank_display = {1: "🥇", 2: "🥈", 3: "🥉"}.get(index, f"{index}")
315
 
316
- leaderboard_html += f"""
317
- <tr>
318
- <td class='rank-column'>{rank_display}</td>
319
- <td>{get_human_readable_name(data['model'])}</td>
320
- <td><strong>{round(data['current_rating'])}</strong></td>
321
- <td>{data['positive_impact']}</td>
322
- <td>{data['negative_impact']}</td>
323
- <td>{data['total_battles']}</td>
324
- <td>{round(data['initial_rating'])}</td>
325
- </tr>
326
- """
327
-
328
- leaderboard_html += "</table>"
329
- return leaderboard_html
 
330
 
331
  def create_backup():
332
  while True:
@@ -344,77 +222,4 @@ def create_backup():
344
 
345
  def start_backup_thread():
346
  backup_thread = threading.Thread(target=create_backup, daemon=True)
347
- backup_thread.start()
348
-
349
- def get_leaderboard_chart():
350
- battle_results = get_current_leaderboard()
351
-
352
- # Calculate scores and sort results
353
- for model, results in battle_results.items():
354
- total_battles = results["wins"] + results["losses"]
355
- if total_battles > 0:
356
- win_rate = results["wins"] / total_battles
357
- results["score"] = win_rate * (1 - 1 / (total_battles + 1))
358
- else:
359
- results["score"] = 0
360
-
361
- sorted_results = sorted(
362
- battle_results.items(),
363
- key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
364
- reverse=True
365
- )
366
-
367
- models = [get_human_readable_name(model) for model, _ in sorted_results]
368
- wins = [results["wins"] for _, results in sorted_results]
369
- losses = [results["losses"] for _, results in sorted_results]
370
- scores = [results["score"] for _, results in sorted_results]
371
-
372
- fig = go.Figure()
373
-
374
- # Stacked Bar chart for Wins and Losses
375
- fig.add_trace(go.Bar(
376
- x=models,
377
- y=wins,
378
- name='Wins',
379
- marker_color='#22577a'
380
- ))
381
- fig.add_trace(go.Bar(
382
- x=models,
383
- y=losses,
384
- name='Losses',
385
- marker_color='#38a3a5'
386
- ))
387
-
388
- # Line chart for Scores
389
- fig.add_trace(go.Scatter(
390
- x=models,
391
- y=scores,
392
- name='Score',
393
- yaxis='y2',
394
- line=dict(color='#ff7f0e', width=2)
395
- ))
396
-
397
- # Update layout for full-width, increased height, and secondary y-axis
398
- fig.update_layout(
399
- title='Model Performance',
400
- xaxis_title='Models',
401
- yaxis_title='Number of Battles',
402
- yaxis2=dict(
403
- title='Score',
404
- overlaying='y',
405
- side='right'
406
- ),
407
- barmode='stack',
408
- height=800,
409
- width=1450,
410
- autosize=True,
411
- legend=dict(
412
- orientation='h',
413
- yanchor='bottom',
414
- y=1.02,
415
- xanchor='right',
416
- x=1
417
- )
418
- )
419
-
420
- return fig
 
6
  import threading
7
  import config
8
  import math
 
9
 
10
  # Initialize Nextcloud client
11
  nc = Nextcloud(nextcloud_url=config.NEXTCLOUD_URL, nc_auth_user=config.NEXTCLOUD_USERNAME, nc_auth_pass=config.NEXTCLOUD_PASSWORD)
 
111
  def get_leaderboard():
112
  leaderboard = load_leaderboard()
113
 
114
+ # Prepare data for Gradio table
115
+ table_data = []
116
+ headers = ["Model", "Score", "Wins", "Losses", "Total Battles", "Win Rate"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ for model, results in leaderboard.items():
119
+ wins = results.get('wins', 0)
120
+ losses = results.get('losses', 0)
121
+ total_battles = wins + losses
122
 
123
+ # Calculate win rate
124
+ win_rate = wins / total_battles if total_battles > 0 else 0
125
 
126
+ # Calculate score using the formula: win_rate * (1 - 1/(total_battles + 1))
127
+ score = win_rate * (1 - 1/(total_battles + 1)) if total_battles > 0 else 0
 
128
 
129
+ # Get human readable name
130
+ human_readable = get_human_readable_name(model)
 
131
 
132
+ # Format the row
133
+ row = [
134
+ human_readable,
135
+ f"{score:.3f}",
136
+ str(wins),
137
+ str(losses),
138
+ str(total_battles),
139
+ f"{win_rate:.1%}"
140
+ ]
141
+ table_data.append(row)
142
+
143
+ # Sort by score (descending)
144
+ table_data.sort(key=lambda x: float(x[1]), reverse=True)
145
+
146
+ return table_data
147
 
148
  def calculate_elo_impact(model):
149
  positive_impact = 0
 
169
 
170
  def get_elo_leaderboard():
171
  ensure_elo_ratings_initialized()
 
172
 
173
+ # Prepare data for Gradio table
174
+ table_data = []
175
+ headers = ["Model", "ELO Rating", "Wins", "Losses", "Total Battles", "Win Rate"]
176
+
177
+ leaderboard = load_leaderboard()
178
  all_models = set(dict(config.get_approved_models()).keys()) | set(leaderboard.keys())
179
 
 
180
  for model in all_models:
181
+ # Get ELO rating
182
+ rating = elo_ratings.get(model, 1000 + (get_model_size(model) * 100))
183
 
184
+ # Get battle data
185
+ wins = leaderboard.get(model, {}).get('wins', 0)
186
+ losses = leaderboard.get(model, {}).get('losses', 0)
187
+ total_battles = wins + losses
188
+ win_rate = wins / total_battles if total_battles > 0 else 0
 
 
 
189
 
190
+ # Get human readable name
191
+ human_readable = get_human_readable_name(model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
+ # Format the row
194
+ row = [
195
+ human_readable,
196
+ f"{rating:.1f}",
197
+ str(wins),
198
+ str(losses),
199
+ str(total_battles),
200
+ f"{win_rate:.1%}"
201
+ ]
202
+ table_data.append(row)
203
+
204
+ # Sort by ELO rating (descending)
205
+ table_data.sort(key=lambda x: float(x[1]), reverse=True)
206
+
207
+ return table_data
208
 
209
  def create_backup():
210
  while True:
 
222
 
223
  def start_backup_thread():
224
  backup_thread = threading.Thread(target=create_backup, daemon=True)
225
+ backup_thread.start()