Spaces:

k-mktr
/

gpu-poor-llm-arena

Running

App Files Files Community

k-mktr commited on Mar 13

Commit

f5e465c

verified ·

1 Parent(s): 7ddc2df

Update leaderboard.py

Browse files

Files changed (1) hide show

leaderboard.py +58 -253

leaderboard.py CHANGED Viewed

@@ -6,7 +6,6 @@ from datetime import datetime
 import threading
 import config
 import math
-import plotly.graph_objects as go
 # Initialize Nextcloud client
 nc = Nextcloud(nextcloud_url=config.NEXTCLOUD_URL, nc_auth_user=config.NEXTCLOUD_USERNAME, nc_auth_pass=config.NEXTCLOUD_PASSWORD)
@@ -112,101 +111,39 @@ def get_human_readable_name(model_name: str) -> str:
 def get_leaderboard():
     leaderboard = load_leaderboard()
-    # Calculate scores for each model
-    for model, results in leaderboard.items():
-        total_battles = results["wins"] + results["losses"]
-        if total_battles > 0:
-            win_rate = results["wins"] / total_battles
-            results["score"] = win_rate * (1 - 1 / (total_battles + 1))
-        else:
-            results["score"] = 0
-    # Sort results by score, then by total battles
-    sorted_results = sorted(
-        leaderboard.items(),
-        key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
-        reverse=True
-    )
-    # Explanation of the main leaderboard
-    explanation = """
-    <p style="font-size: 16px; margin-bottom: 20px;">
-    This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
-    <br>
-    <strong>Score = Win Rate * (1 - 1 / (Total Battles + 1))</strong>
-    <br>
-    This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.
-    </p>
-    """
-    leaderboard_html = f"""
-    {explanation}
-    <style>
-        .leaderboard-table {{
-            width: 100%;
-            border-collapse: collapse;
-            font-family: Arial, sans-serif;
-        }}
-        .leaderboard-table th, .leaderboard-table td {{
-            border: 1px solid #ddd;
-            padding: 8px;
-            text-align: left;
-        }}
-        .leaderboard-table th {{
-            background-color: rgba(255, 255, 255, 0.1);
-            font-weight: bold;
-        }}
-        .rank-column {{
-            width: 60px;
-            text-align: center;
-        }}
-        .opponent-details {{
-            font-size: 0.9em;
-            color: #888;
-        }}
-    </style>
-    <table class='leaderboard-table'>
-    <tr>
-        <th class='rank-column'>Rank</th>
-        <th>Model</th>
-        <th>Score</th>
-        <th>Wins</th>
-        <th>Losses</th>
-        <th>Win Rate</th>
-        <th>Total Battles</th>
-        <th>Top Rival</th>
-        <th>Toughest Opponent</th>
-    </tr>
-    """
-    for index, (model, results) in enumerate(sorted_results, start=1):
-        total_battles = results["wins"] + results["losses"]
-        win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
-        rank_display = {1: "🥇", 2: "🥈", 3: "🥉"}.get(index, f"{index}")
-        top_rival = max(results["opponents"].items(), key=lambda x: x[1]["wins"], default=(None, {"wins": 0}))
-        top_rival_name = get_human_readable_name(top_rival[0]) if top_rival[0] else "N/A"
-        top_rival_wins = top_rival[1]["wins"]
-        toughest_opponent = max(results["opponents"].items(), key=lambda x: x[1]["losses"], default=(None, {"losses": 0}))
-        toughest_opponent_name = get_human_readable_name(toughest_opponent[0]) if toughest_opponent[0] else "N/A"
-        toughest_opponent_losses = toughest_opponent[1]["losses"]
-        leaderboard_html += f"""
-        <tr>
-            <td class='rank-column'>{rank_display}</td>
-            <td>{get_human_readable_name(model)}</td>
-            <td>{results['score']:.4f}</td>
-            <td>{results['wins']}</td>
-            <td>{results['losses']}</td>
-            <td>{win_rate:.2f}%</td>
-            <td>{total_battles}</td>
-            <td class='opponent-details'>{top_rival_name} (W: {top_rival_wins})</td>
-            <td class='opponent-details'>{toughest_opponent_name} (L: {toughest_opponent_losses})</td>
-        </tr>
-        """
-    leaderboard_html += "</table>"
-    return leaderboard_html
 def calculate_elo_impact(model):
     positive_impact = 0
@@ -232,101 +169,42 @@ def calculate_elo_impact(model):
 def get_elo_leaderboard():
     ensure_elo_ratings_initialized()
-    leaderboard = load_leaderboard()
-    # Create a list of all models, including those from APPROVED_MODELS that might not be in the leaderboard yet
     all_models = set(dict(config.get_approved_models()).keys()) | set(leaderboard.keys())
-    elo_data = []
     for model in all_models:
-        initial_rating = 1000 + (get_model_size(model) * 100)
-        current_rating = elo_ratings.get(model, initial_rating)
-        # Calculate battle data only if the model exists in the leaderboard
-        if model in leaderboard:
-            wins = leaderboard[model].get('wins', 0)
-            losses = leaderboard[model].get('losses', 0)
-            total_battles = wins + losses
-            positive_impact, negative_impact, _ = calculate_elo_impact(model)
-        else:
-            wins = losses = total_battles = positive_impact = negative_impact = 0
-        elo_data.append({
-            'model': model,
-            'current_rating': current_rating,
-            'initial_rating': initial_rating,
-            'total_battles': total_battles,
-            'positive_impact': positive_impact,
-            'negative_impact': negative_impact
-        })
-    # Sort the data by current rating
-    sorted_elo_data = sorted(elo_data, key=lambda x: x['current_rating'], reverse=True)
-    min_initial_rating = min(data['initial_rating'] for data in elo_data)
-    max_initial_rating = max(data['initial_rating'] for data in elo_data)
-    explanation_elo = f"""
-    <p style="font-size: 16px; margin-bottom: 20px;">
-    This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
-    Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
-    The "Positive Impact" score reflects the significance of wins, with higher scores for defeating larger models.
-    The "Negative Impact" score indicates the significance of losses, with higher scores for losing against smaller models.
-    The current ELO rating is calculated based on these impacts and the model's performance history.
-    </p>
-    """
-    leaderboard_html = f"""
-    {explanation_elo}
-    <style>
-        .elo-leaderboard-table {{
-            width: 100%;
-            border-collapse: collapse;
-            font-family: Arial, sans-serif;
-        }}
-        .elo-leaderboard-table th, .elo-leaderboard-table td {{
-            border: 1px solid #ddd;
-            padding: 8px;
-            text-align: left;
-        }}
-        .elo-leaderboard-table th {{
-            background-color: rgba(255, 255, 255, 0.1);
-            font-weight: bold;
-        }}
-        .rank-column {{
-            width: 60px;
-            text-align: center;
-        }}
-    </style>
-    <table class='elo-leaderboard-table'>
-    <tr>
-        <th class='rank-column'>Rank</th>
-        <th>Model</th>
-        <th>Current ELO Rating</th>
-        <th>Positive Impact</th>
-        <th>Negative Impact</th>
-        <th>Total Battles</th>
-        <th>Initial Rating</th>
-    </tr>
-    """
-    for index, data in enumerate(sorted_elo_data, start=1):
-        rank_display = {1: "🥇", 2: "🥈", 3: "🥉"}.get(index, f"{index}")
-        leaderboard_html += f"""
-        <tr>
-            <td class='rank-column'>{rank_display}</td>
-            <td>{get_human_readable_name(data['model'])}</td>
-            <td><strong>{round(data['current_rating'])}</strong></td>
-            <td>{data['positive_impact']}</td>
-            <td>{data['negative_impact']}</td>
-            <td>{data['total_battles']}</td>
-            <td>{round(data['initial_rating'])}</td>
-        </tr>
-        """
-    leaderboard_html += "</table>"
-    return leaderboard_html
 def create_backup():
     while True:
@@ -344,77 +222,4 @@ def create_backup():
 def start_backup_thread():
     backup_thread = threading.Thread(target=create_backup, daemon=True)
-    backup_thread.start()
-def get_leaderboard_chart():
-    battle_results = get_current_leaderboard()
-    # Calculate scores and sort results
-    for model, results in battle_results.items():
-        total_battles = results["wins"] + results["losses"]
-        if total_battles > 0:
-            win_rate = results["wins"] / total_battles
-            results["score"] = win_rate * (1 - 1 / (total_battles + 1))
-        else:
-            results["score"] = 0
-    sorted_results = sorted(
-        battle_results.items(),
-        key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
-        reverse=True
-    )
-    models = [get_human_readable_name(model) for model, _ in sorted_results]
-    wins = [results["wins"] for _, results in sorted_results]
-    losses = [results["losses"] for _, results in sorted_results]
-    scores = [results["score"] for _, results in sorted_results]
-    fig = go.Figure()
-    # Stacked Bar chart for Wins and Losses
-    fig.add_trace(go.Bar(
-        x=models,
-        y=wins,
-        name='Wins',
-        marker_color='#22577a'
-    ))
-    fig.add_trace(go.Bar(
-        x=models,
-        y=losses,
-        name='Losses',
-        marker_color='#38a3a5'
-    ))
-    # Line chart for Scores
-    fig.add_trace(go.Scatter(
-        x=models,
-        y=scores,
-        name='Score',
-        yaxis='y2',
-        line=dict(color='#ff7f0e', width=2)
-    ))
-    # Update layout for full-width, increased height, and secondary y-axis
-    fig.update_layout(
-        title='Model Performance',
-        xaxis_title='Models',
-        yaxis_title='Number of Battles',
-        yaxis2=dict(
-            title='Score',
-            overlaying='y',
-            side='right'
-        ),
-        barmode='stack',
-        height=800,
-        width=1450,
-        autosize=True,
-        legend=dict(
-            orientation='h',
-            yanchor='bottom',
-            y=1.02,
-            xanchor='right',
-            x=1
-        )
-    )
-    return fig

 import threading
 import config
 import math
 # Initialize Nextcloud client
 nc = Nextcloud(nextcloud_url=config.NEXTCLOUD_URL, nc_auth_user=config.NEXTCLOUD_USERNAME, nc_auth_pass=config.NEXTCLOUD_PASSWORD)
 def get_leaderboard():
     leaderboard = load_leaderboard()
+    # Prepare data for Gradio table
+    table_data = []
+    headers = ["Model", "Score", "Wins", "Losses", "Total Battles", "Win Rate"]
+    for model, results in leaderboard.items():
+        wins = results.get('wins', 0)
+        losses = results.get('losses', 0)
+        total_battles = wins + losses
+        # Calculate win rate
+        win_rate = wins / total_battles if total_battles > 0 else 0
+        # Calculate score using the formula: win_rate * (1 - 1/(total_battles + 1))
+        score = win_rate * (1 - 1/(total_battles + 1)) if total_battles > 0 else 0
+        # Get human readable name
+        human_readable = get_human_readable_name(model)
+        # Format the row
+        row = [
+            human_readable,
+            f"{score:.3f}",
+            str(wins),
+            str(losses),
+            str(total_battles),
+            f"{win_rate:.1%}"
+        ]
+        table_data.append(row)
+    # Sort by score (descending)
+    table_data.sort(key=lambda x: float(x[1]), reverse=True)
+    return table_data
 def calculate_elo_impact(model):
     positive_impact = 0
 def get_elo_leaderboard():
     ensure_elo_ratings_initialized()
+    # Prepare data for Gradio table
+    table_data = []
+    headers = ["Model", "ELO Rating", "Wins", "Losses", "Total Battles", "Win Rate"]
+    leaderboard = load_leaderboard()
     all_models = set(dict(config.get_approved_models()).keys()) | set(leaderboard.keys())
     for model in all_models:
+        # Get ELO rating
+        rating = elo_ratings.get(model, 1000 + (get_model_size(model) * 100))
+        # Get battle data
+        wins = leaderboard.get(model, {}).get('wins', 0)
+        losses = leaderboard.get(model, {}).get('losses', 0)
+        total_battles = wins + losses
+        win_rate = wins / total_battles if total_battles > 0 else 0
+        # Get human readable name
+        human_readable = get_human_readable_name(model)
+        # Format the row
+        row = [
+            human_readable,
+            f"{rating:.1f}",
+            str(wins),
+            str(losses),
+            str(total_battles),
+            f"{win_rate:.1%}"
+        ]
+        table_data.append(row)
+    # Sort by ELO rating (descending)
+    table_data.sort(key=lambda x: float(x[1]), reverse=True)
+    return table_data
 def create_backup():
     while True:
 def start_backup_thread():
     backup_thread = threading.Thread(target=create_backup, daemon=True)
+    backup_thread.start()