Spaces:

HumeAI
/

expressive-tts-arena

Running

Zachary Greathouse twitchard commited on Mar 24

Commit

97b3bfd

unverified ·

1 Parent(s): 9ed181c

Zg/add head to head results (#19)

* Add OpenAI python SDK to dependencies

* Fix Anthropic clean API Error message.

* Update constants and custom types associated with TTS providers to include OpenAI

* Add OpenAI integration

* Update logic for selecting providers, add OpenAI tts to UI

* Fix typo in openai_api.py

* Update docstrings in openai_api.py

* Update leaderboard results query to include OpenAI results

* Add citation

* Adjust padding in UI components

* Adjust padding in UI components in citation

* Add transitive dependency override for sounddevice in pyproject.toml

* remove sounddevice

* Add warning toast for custom text inputs

* Improve leaderboard results query to account for zero records, and update to only include relevant comparison types for each provider.

* Fix database package imports and add head-to-head comparison queries

* Add utils for fetching head-to-head comparison data

* Updates UI to include head-to-head comparison tables, updates leaderboard data fetching in UI to include comparison data

---------

Co-authored-by: twitchard <[email protected]>

Files changed (7) hide show

src/constants.py +1 -9
src/database/__init__.py +6 -1
src/database/crud.py +164 -7
src/frontend.py +58 -16
src/scripts/init_db.py +1 -1
src/scripts/test_db.py +1 -1
src/utils.py +146 -30

src/constants.py CHANGED Viewed

@@ -10,7 +10,6 @@ from typing import Dict, List
 # Third-Party Library Imports
 from src.custom_types import (
     ComparisonType,
-    LeaderboardEntry,
     OptionKey,
     OptionLabel,
     TTSProviderName,
@@ -26,7 +25,7 @@ HUME_AI: TTSProviderName = "Hume AI"
 ELEVENLABS: TTSProviderName = "ElevenLabs"
 OPENAI: TTSProviderName = "OpenAI"
-TTS_PROVIDERS: List[TTSProviderName] = ["Hume AI", "ElevenLabs", "OpenAI"]
 TTS_PROVIDER_LINKS = {
     "Hume AI": {
         "provider_link": "https://hume.ai/",
@@ -169,10 +168,3 @@ META_TAGS: List[Dict[str, str]] = [
         'content': '/static/arena-opengraph-logo.png'
     }
 ]
-# Reflects and empty leaderboard state
-DEFAULT_LEADERBOARD: List[LeaderboardEntry] = [
-    LeaderboardEntry("1", "", "", "0%", "0"),
-    LeaderboardEntry("2", "", "", "0%", "0"),
-    LeaderboardEntry("3", "", "", "0%", "0"),
-]

 # Third-Party Library Imports
 from src.custom_types import (
     ComparisonType,
     OptionKey,
     OptionLabel,
     TTSProviderName,
 ELEVENLABS: TTSProviderName = "ElevenLabs"
 OPENAI: TTSProviderName = "OpenAI"
+TTS_PROVIDERS: List[TTSProviderName] = ["Hume AI", "OpenAI", "ElevenLabs"]
 TTS_PROVIDER_LINKS = {
     "Hume AI": {
         "provider_link": "https://hume.ai/",
         'content': '/static/arena-opengraph-logo.png'
     }
 ]

src/database/__init__.py CHANGED Viewed

@@ -1,10 +1,15 @@
-from .crud import create_vote
 from .database import AsyncDBSessionMaker, Base, engine, init_db
 __all__ = [
     "AsyncDBSessionMaker",
     "Base",
     "create_vote",
     "engine",
     "init_db",
 ]

+from .crud import create_vote, get_head_to_head_battle_stats, get_head_to_head_win_rate_stats, get_leaderboard_stats
 from .database import AsyncDBSessionMaker, Base, engine, init_db
+from .models import VoteResult
 __all__ = [
     "AsyncDBSessionMaker",
     "Base",
+    "VoteResult",
     "create_vote",
     "engine",
+    "get_head_to_head_battle_stats",
+    "get_head_to_head_win_rate_stats",
+    "get_leaderboard_stats",
     "init_db",
 ]

src/database/crud.py CHANGED Viewed

@@ -5,6 +5,9 @@ This module defines the operations for the Expressive TTS Arena project's databa
 Since vote records are never updated or deleted, only functions to create and read votes are provided.
 """
 # Third-Party Library Imports
 from sqlalchemy import text
 from sqlalchemy.exc import SQLAlchemyError
@@ -12,7 +15,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
 # Local Application Imports
 from src.config import logger
-from src.constants import DEFAULT_LEADERBOARD
 from src.custom_types import LeaderboardEntry, LeaderboardTableEntries, VotingResults
 from src.database.models import VoteResult
@@ -83,6 +85,12 @@ async def get_leaderboard_stats(db: AsyncSession) -> LeaderboardTableEntries:
         LeaderboardTableEntries: A list of LeaderboardEntry objects containing rank,
                                 provider name, model name, win rate, and total votes.
     """
     try:
         query = text(
             """
@@ -137,6 +145,10 @@ async def get_leaderboard_stats(db: AsyncSession) -> LeaderboardTableEntries:
         result = await db.execute(query)
         rows = result.fetchall()
         # Format the data for the leaderboard
         leaderboard_data = []
         for i, row in enumerate(rows, 1):
@@ -150,16 +162,161 @@ async def get_leaderboard_stats(db: AsyncSession) -> LeaderboardTableEntries:
             )
             leaderboard_data.append(leaderboard_entry)
-        # If no data was found, return default entries
-        if not leaderboard_data:
-            return DEFAULT_LEADERBOARD
         return leaderboard_data
     except SQLAlchemyError as e:
         logger.error(f"Database error while fetching leaderboard stats: {e}")
-        return DEFAULT_LEADERBOARD
     except Exception as e:
         logger.error(f"Unexpected error while fetching leaderboard stats: {e}")
-        return DEFAULT_LEADERBOARD

 Since vote records are never updated or deleted, only functions to create and read votes are provided.
 """
+# Standard Library Imports
+from typing import List
 # Third-Party Library Imports
 from sqlalchemy import text
 from sqlalchemy.exc import SQLAlchemyError
 # Local Application Imports
 from src.config import logger
 from src.custom_types import LeaderboardEntry, LeaderboardTableEntries, VotingResults
 from src.database.models import VoteResult
         LeaderboardTableEntries: A list of LeaderboardEntry objects containing rank,
                                 provider name, model name, win rate, and total votes.
     """
+    default_leaderboard = [
+        LeaderboardEntry("1", "", "", "0%", "0"),
+        LeaderboardEntry("2", "", "", "0%", "0"),
+        LeaderboardEntry("3", "", "", "0%", "0"),
+    ]
     try:
         query = text(
             """
         result = await db.execute(query)
         rows = result.fetchall()
+        # If no rows, return default
+        if not rows:
+            return default_leaderboard
         # Format the data for the leaderboard
         leaderboard_data = []
         for i, row in enumerate(rows, 1):
             )
             leaderboard_data.append(leaderboard_entry)
         return leaderboard_data
     except SQLAlchemyError as e:
         logger.error(f"Database error while fetching leaderboard stats: {e}")
+        return default_leaderboard
     except Exception as e:
         logger.error(f"Unexpected error while fetching leaderboard stats: {e}")
+        return default_leaderboard
+async def get_head_to_head_battle_stats(db: AsyncSession) -> List[List[str]]:
+    """
+    Fetches the total number of voting results for each comparison type (excluding "Hume AI - Hume AI").
+    Args:
+        db (AsyncSession): The SQLAlchemy async database session.
+    Returns:
+        List[List[str]]: A list of lists, where each inner list contains the comparison type and the count.
+    """
+    default_counts = [
+        ["Hume AI - OpenAI", "0"],
+        ["Hume AI - ElevenLabs", "0"],
+        ["OpenAI - ElevenLabs", "0"],
+    ]
+    try:
+        query = text(
+            """
+            SELECT
+                comparison_type,
+                COUNT(*) as total
+            FROM vote_results
+            WHERE comparison_type != 'Hume AI - Hume AI'
+            GROUP BY comparison_type
+            ORDER BY comparison_type;
+            """
+        )
+        result = await db.execute(query)
+        rows = result.fetchall()
+        # If no rows, return default
+        if not rows:
+            return default_counts
+        # Format the results
+        formatted_results = []
+        for row in rows:
+            comparison_type, count = row
+            formatted_results.append([comparison_type, str(count)])
+        # Make sure all expected comparison types are included
+        expected_types = {"Hume AI - OpenAI", "Hume AI - ElevenLabs", "OpenAI - ElevenLabs"}
+        found_types = {row[0] for row in formatted_results}
+        # Add missing types with zero counts
+        for type_name in expected_types - found_types:
+            formatted_results.append([type_name, "0"])
+        # Sort the results by comparison type
+        formatted_results.sort(key=lambda x: x[0])
+        return formatted_results
+    except SQLAlchemyError as e:
+        logger.error(f"Database error while fetching comparison counts: {e}")
+        return default_counts
+    except Exception as e:
+        logger.error(f"Unexpected error while fetching comparison counts: {e}")
+        return default_counts
+async def get_head_to_head_win_rate_stats(db: AsyncSession) -> List[List[str]]:
+    """
+    Calculates the win rate for each provider against the other in head-to-head comparisons.
+    Args:
+        db (AsyncSession): The SQLAlchemy async database session.
+    Returns:
+        List[List[str]]: A list of lists, where each inner list contains:
+            - The comparison type
+            - The win rate of the first provider (the one named first in the comparison type)
+            - The win rate of the second provider (the one named second in the comparison type)
+    """
+    default_win_rates = [
+        ["Hume AI - OpenAI", "0%", "0%"],
+        ["Hume AI - ElevenLabs", "0%", "0%"],
+        ["OpenAI - ElevenLabs", "0%", "0%"],
+    ]
+    try:
+        query = text(
+            """
+            SELECT
+                comparison_type,
+                CASE WHEN COUNT(*) > 0
+                    THEN ROUND(SUM(CASE
+                        WHEN comparison_type = 'Hume AI - OpenAI' AND winning_provider = 'Hume AI' THEN 1
+                        WHEN comparison_type = 'Hume AI - ElevenLabs' AND winning_provider = 'Hume AI' THEN 1
+                        WHEN comparison_type = 'OpenAI - ElevenLabs' AND winning_provider = 'OpenAI' THEN 1
+                        ELSE 0
+                    END) * 100.0 / COUNT(*), 2)
+                    ELSE 0
+                END as first_provider_win_rate,
+                CASE WHEN COUNT(*) > 0
+                    THEN ROUND(SUM(CASE
+                        WHEN comparison_type = 'Hume AI - OpenAI' AND winning_provider = 'OpenAI' THEN 1
+                        WHEN comparison_type = 'Hume AI - ElevenLabs' AND winning_provider = 'ElevenLabs' THEN 1
+                        WHEN comparison_type = 'OpenAI - ElevenLabs' AND winning_provider = 'ElevenLabs' THEN 1
+                        ELSE 0
+                    END) * 100.0 / COUNT(*), 2)
+                    ELSE 0
+                END as second_provider_win_rate
+            FROM vote_results
+            WHERE comparison_type != 'Hume AI - Hume AI'
+            GROUP BY comparison_type
+            ORDER BY comparison_type;
+            """
+        )
+        result = await db.execute(query)
+        rows = result.fetchall()
+        # If no rows, return default
+        if not rows:
+            return default_win_rates
+        # Format the results
+        formatted_results = []
+        for row in rows:
+            comparison_type, first_provider_win_rate, second_provider_win_rate = row
+            formatted_results.append([
+                comparison_type,
+                f"{first_provider_win_rate}%",
+                f"{second_provider_win_rate}%"
+            ])
+        # Make sure all expected comparison types are included
+        expected_types = {"Hume AI - OpenAI", "Hume AI - ElevenLabs", "OpenAI - ElevenLabs"}
+        found_types = {row[0] for row in formatted_results}
+        # Add missing types with zero win rates
+        for type_name in expected_types - found_types:
+            formatted_results.append([type_name, "0%", "0%"])
+        # Sort the results by comparison type
+        formatted_results.sort(key=lambda x: x[0])
+        return formatted_results
+    except SQLAlchemyError as e:
+        logger.error(f"Database error while fetching provider win rates: {e}")
+        return default_win_rates
+    except Exception as e:
+        logger.error(f"Unexpected error while fetching provider win rates: {e}")
+        return default_win_rates

src/frontend.py CHANGED Viewed

@@ -22,7 +22,7 @@ import gradio as gr
 from src import constants
 from src.config import Config, logger
 from src.custom_types import Option, OptionMap
-from src.database.database import AsyncDBSessionMaker
 from src.integrations import (
     AnthropicError,
     ElevenLabsError,
@@ -54,6 +54,8 @@ class Frontend:
         # leaderboard update state
         self._leaderboard_data: List[List[str]] = [[]]
         self._leaderboard_cache_hash: Optional[str] = None
         self._last_leaderboard_update_time: float = 0.0
         self._min_refresh_interval = 30
@@ -77,7 +79,11 @@ class Frontend:
             return False
         # Fetch the latest data
-        latest_leaderboard_data = await get_leaderboard_data(self.db_session_maker)
         # Generate a hash of the new data to check if it's changed
         data_str = json.dumps(str(latest_leaderboard_data))
@@ -90,6 +96,8 @@ class Frontend:
         # Update the cache and timestamp
         self._leaderboard_data = latest_leaderboard_data
         self._leaderboard_cache_hash = data_hash
         self._last_leaderboard_update_time = current_time
         logger.info("Leaderboard data updated successfully.")
@@ -330,7 +338,7 @@ class Frontend:
             gr.update(value=character_description), # Update character description
         )
-    async def _refresh_leaderboard(self, force: bool = False) -> gr.DataFrame:
         """
         Asynchronously fetches and formats the latest leaderboard data.
@@ -338,17 +346,20 @@ class Frontend:
             force (bool): If True, bypass time-based throttling.
         Returns:
-            gr.DataFrame: Updated DataFrame or gr.skip() if no update needed
         """
         data_updated = await self._update_leaderboard_data(force=force)
         if not self._leaderboard_data:
             raise gr.Error("Unable to retrieve leaderboard data. Please refresh the page or try again shortly.")
-        # Only return an update if the data changed or force=True
-        if data_updated:
-            return gr.update(value=self._leaderboard_data)
-        return gr.skip()
     async def _handle_tab_select(self, evt: gr.SelectData):
         """
@@ -358,12 +369,11 @@ class Frontend:
             evt (gr.SelectData): Event data containing information about the selected tab
         Returns:
-            gr.update or gr.skip: Update for the leaderboard table if data changed, otherwise skip
         """
-        # Check if the selected tab is "Leaderboard" by name
         if evt.value == "Leaderboard":
             return await self._refresh_leaderboard(force=False)
-        return gr.skip()
     def _disable_ui(self) -> Tuple[
         gr.Button,
@@ -909,6 +919,37 @@ class Frontend:
                 elem_id="leaderboard-table"
             )
         with gr.Accordion(label="Citation", open=False):
             with gr.Column(variant="panel"):
                 with gr.Column(variant="panel"):
@@ -965,7 +1006,8 @@ class Frontend:
         # Wrapper for the async refresh function
         async def async_refresh_handler():
-            return await self._refresh_leaderboard(force=True)
         # Handler to re-enable the button after a refresh
         def reenable_button():
@@ -980,14 +1022,14 @@ class Frontend:
         ).then(
             fn=async_refresh_handler,
             inputs=[],
-            outputs=[leaderboard_table]
         ).then(
             fn=reenable_button,
             inputs=[],
             outputs=[refresh_button]
         )
-        return leaderboard_table
     async def build_gradio_interface(self) -> gr.Blocks:
         """
@@ -1004,12 +1046,12 @@ class Frontend:
                 with gr.TabItem("Arena"):
                     self._build_arena_section()
                 with gr.TabItem("Leaderboard"):
-                    leaderboard_table = self._build_leaderboard_section()
             tabs.select(
                 fn=self._handle_tab_select,
                 inputs=[],
-                outputs=[leaderboard_table],
             )
         logger.debug("Gradio interface built successfully")

 from src import constants
 from src.config import Config, logger
 from src.custom_types import Option, OptionMap
+from src.database import AsyncDBSessionMaker
 from src.integrations import (
     AnthropicError,
     ElevenLabsError,
         # leaderboard update state
         self._leaderboard_data: List[List[str]] = [[]]
+        self._battle_counts_data: List[List[str]] = [[]]
+        self._win_rates_data: List[List[str]] = [[]]
         self._leaderboard_cache_hash: Optional[str] = None
         self._last_leaderboard_update_time: float = 0.0
         self._min_refresh_interval = 30
             return False
         # Fetch the latest data
+        (
+            latest_leaderboard_data,
+            latest_battle_counts_data,
+            latest_win_rates_data
+        ) = await get_leaderboard_data(self.db_session_maker)
         # Generate a hash of the new data to check if it's changed
         data_str = json.dumps(str(latest_leaderboard_data))
         # Update the cache and timestamp
         self._leaderboard_data = latest_leaderboard_data
+        self._battle_counts_data = latest_battle_counts_data
+        self._win_rates_data = latest_win_rates_data
         self._leaderboard_cache_hash = data_hash
         self._last_leaderboard_update_time = current_time
         logger.info("Leaderboard data updated successfully.")
             gr.update(value=character_description), # Update character description
         )
+    async def _refresh_leaderboard(self, force: bool = False) -> Tuple[gr.DataFrame, gr.DataFrame, gr.DataFrame]:
         """
         Asynchronously fetches and formats the latest leaderboard data.
             force (bool): If True, bypass time-based throttling.
         Returns:
+            tuple: Updated DataFrames or gr.skip() if no update needed
         """
         data_updated = await self._update_leaderboard_data(force=force)
         if not self._leaderboard_data:
             raise gr.Error("Unable to retrieve leaderboard data. Please refresh the page or try again shortly.")
+        if data_updated or force:
+            return (
+                gr.update(value=self._leaderboard_data),
+                gr.update(value=self._battle_counts_data),
+                gr.update(value=self._win_rates_data)
+            )
+        return gr.skip(), gr.skip(), gr.skip()
     async def _handle_tab_select(self, evt: gr.SelectData):
         """
             evt (gr.SelectData): Event data containing information about the selected tab
         Returns:
+            tuple: Updates for the three tables if data changed, otherwise skip
         """
         if evt.value == "Leaderboard":
             return await self._refresh_leaderboard(force=False)
+        return gr.skip(), gr.skip(), gr.skip()
     def _disable_ui(self) -> Tuple[
         gr.Button,
                 elem_id="leaderboard-table"
             )
+        with gr.Column():
+            gr.HTML(
+                value="""
+                <h2 style="padding-top: 12px;" class="tab-header">📊 Head-to-Head Matchups</h2>
+                <p style="padding-left: 8px; width: 80%;">
+                    These tables show how each provider performs against others in direct comparisons.
+                    The first table shows the total number of comparisons between each pair of providers.
+                    The second table shows the win rate (percentage) of the row provider against the column provider.
+                </p>
+                """,
+                padding=False
+            )
+        with gr.Row(equal_height=True):
+            with gr.Column(min_width=420):
+                battle_counts_table = gr.DataFrame(
+                    headers=["", "Hume AI", "OpenAI", "ElevenLabs"],
+                    datatype=["html", "html", "html", "html"],
+                    column_widths=[132, 132, 132, 132],
+                    value=self._battle_counts_data,
+                    interactive=False,
+                )
+            with gr.Column(min_width=420):
+                win_rates_table = gr.DataFrame(
+                    headers=["", "Hume AI", "OpenAI", "ElevenLabs"],
+                    datatype=["html", "html", "html", "html"],
+                    column_widths=[132, 132, 132, 132],
+                    value=self._win_rates_data,
+                    interactive=False,
+                )
         with gr.Accordion(label="Citation", open=False):
             with gr.Column(variant="panel"):
                 with gr.Column(variant="panel"):
         # Wrapper for the async refresh function
         async def async_refresh_handler():
+            leaderboard_update, battle_counts_update, win_rates_update = await self._refresh_leaderboard(force=True)
+            return leaderboard_update, battle_counts_update, win_rates_update
         # Handler to re-enable the button after a refresh
         def reenable_button():
         ).then(
             fn=async_refresh_handler,
             inputs=[],
+            outputs=[leaderboard_table, battle_counts_table, win_rates_table]  # Update all three tables
         ).then(
             fn=reenable_button,
             inputs=[],
             outputs=[refresh_button]
         )
+        return leaderboard_table, battle_counts_table, win_rates_table
     async def build_gradio_interface(self) -> gr.Blocks:
         """
                 with gr.TabItem("Arena"):
                     self._build_arena_section()
                 with gr.TabItem("Leaderboard"):
+                    leaderboard_table, battle_counts_table, win_rates_table = self._build_leaderboard_section()
             tabs.select(
                 fn=self._handle_tab_select,
                 inputs=[],
+                outputs=[leaderboard_table, battle_counts_table, win_rates_table],
             )
         logger.debug("Gradio interface built successfully")

src/scripts/init_db.py CHANGED Viewed

@@ -13,7 +13,7 @@ from sqlalchemy.ext.asyncio import create_async_engine
 # Local Application Imports
 from src.config import Config, logger
-from src.database.models import Base
 async def init_tables():

 # Local Application Imports
 from src.config import Config, logger
+from src.database import Base
 async def init_tables():

src/scripts/test_db.py CHANGED Viewed

@@ -34,7 +34,7 @@ from sqlalchemy import text
 # Local Application Imports
 from src.config import Config, logger
-from src.database.database import engine, init_db
 async def test_connection_async():

 # Local Application Imports
 from src.config import Config, logger
+from src.database import engine, init_db
 async def test_connection_async():

src/utils.py CHANGED Viewed

@@ -23,14 +23,20 @@ from src import constants
 from src.config import Config, logger
 from src.custom_types import (
     ComparisonType,
     Option,
     OptionKey,
     OptionMap,
     TTSProviderName,
     VotingResults,
 )
-from src.database import crud
-from src.database.database import AsyncDBSessionMaker
 def truncate_text(text: str, max_length: int = 50) -> str:
@@ -374,7 +380,7 @@ async def _persist_vote(db_session_maker: AsyncDBSessionMaker, voting_results: V
     session = await _create_db_session(db_session_maker)
     _log_voting_results(voting_results)
     try:
-        await crud.create_vote(cast(AsyncSession, session), voting_results)
     except Exception as e:
         # Log the error with traceback
         logger.error(f"Failed to create vote record: {e}", exc_info=True)
@@ -434,49 +440,159 @@ async def submit_voting_results(
         logger.error(f"Background task error in submit_voting_results: {e}", exc_info=True)
-async def get_leaderboard_data(db_session_maker: AsyncDBSessionMaker) -> List[List[str]]:
     """
-    Fetches leaderboard data from voting results database
     Returns:
-        LeaderboardTableEntries: A list of LeaderboardEntry objects containing rank, provider name anchor tag, model
-                                name anchor tag, win rate, and total votes.
     """
     # Create session
     session = await _create_db_session(db_session_maker)
     try:
-        leaderboard_data = await crud.get_leaderboard_stats(cast(AsyncSession, session))
         logger.info("Fetched leaderboard data successfully.")
-        # return data formatted for the UI (adds links and styling)
-        return [
-            [
-                f'<p style="text-align: center;">{row[0]}</p>',
-                f"""
-                <a
-                    href="{constants.TTS_PROVIDER_LINKS[row[1]]["provider_link"]}"
-                    target="_blank"
-                    class="provider-link"
-                >{row[1]}</a>
-                """,
-                f"""<a
-                    href="{constants.TTS_PROVIDER_LINKS[row[1]]["model_link"]}"
-                    target="_blank"
-                    class="provider-link"
-                >{row[2]}</a>
-                """,
-                f'<p style="text-align: center;">{row[3]}</p>',
-                f'<p style="text-align: center;">{row[4]}</p>',
-            ] for row in leaderboard_data
-        ]
     except Exception as e:
         # Log the error with traceback
         logger.error(f"Failed to fetch leaderboard data: {e}", exc_info=True)
-        return []
     finally:
         # Always ensure the session is closed
         if session is not None:
             await session.close()
 def validate_env_var(var_name: str) -> str:
     """

 from src.config import Config, logger
 from src.custom_types import (
     ComparisonType,
+    LeaderboardEntry,
     Option,
     OptionKey,
     OptionMap,
     TTSProviderName,
     VotingResults,
 )
+from src.database import (
+    AsyncDBSessionMaker,
+    create_vote,
+    get_head_to_head_battle_stats,
+    get_head_to_head_win_rate_stats,
+    get_leaderboard_stats,
+)
 def truncate_text(text: str, max_length: int = 50) -> str:
     session = await _create_db_session(db_session_maker)
     _log_voting_results(voting_results)
     try:
+        await create_vote(cast(AsyncSession, session), voting_results)
     except Exception as e:
         # Log the error with traceback
         logger.error(f"Failed to create vote record: {e}", exc_info=True)
         logger.error(f"Background task error in submit_voting_results: {e}", exc_info=True)
+async def get_leaderboard_data(
+    db_session_maker: AsyncDBSessionMaker
+) -> Tuple[List[List[str]], List[List[str]], List[List[str]]]:
     """
+    Fetches and formats all leaderboard data from the voting results database.
+    This function retrieves three different datasets:
+    1. Provider rankings with overall performance metrics
+    2. Head-to-head battle counts between providers
+    3. Win rate percentages for each provider against others
+    Args:
+        db_session_maker (AsyncDBSessionMaker): Factory function for creating async database sessions.
     Returns:
+        Tuple containing three datasets, each as List[List[str]]:
+            - leaderboard_data: Provider rankings with performance metrics
+            - battle_counts_data: Number of comparisons between each provider pair
+            - win_rate_data: Win percentages in head-to-head matchups
     """
     # Create session
     session = await _create_db_session(db_session_maker)
     try:
+        leaderboard_data_raw = await get_leaderboard_stats(cast(AsyncSession, session))
+        battle_counts_data_raw = await get_head_to_head_battle_stats(cast(AsyncSession, session))
+        win_rate_data_raw = await get_head_to_head_win_rate_stats(cast(AsyncSession, session))
         logger.info("Fetched leaderboard data successfully.")
+        leaderboard_data = _format_leaderboard_data(leaderboard_data_raw)
+        battle_counts_data = _format_battle_counts_data(battle_counts_data_raw)
+        win_rate_data = _format_win_rate_data(win_rate_data_raw)
+        return leaderboard_data, battle_counts_data, win_rate_data
     except Exception as e:
         # Log the error with traceback
         logger.error(f"Failed to fetch leaderboard data: {e}", exc_info=True)
+        return [[]], [[]], [[]]
     finally:
         # Always ensure the session is closed
         if session is not None:
             await session.close()
+def _format_leaderboard_data(leaderboard_data_raw: List[LeaderboardEntry]) -> List[List[str]]:
+    """
+    Formats raw leaderboard data for display in the UI.
+    Converts LeaderboardEntry objects into HTML-formatted strings with appropriate
+    styling and links for provider and model information.
+    Args:
+        leaderboard_data_raw (List[LeaderboardEntry]): Raw leaderboard data from the database.
+    Returns:
+        List[List[str]]: Formatted HTML strings for each cell in the leaderboard table.
+    """
+    return [
+        [
+            f'<p style="text-align: center;">{row[0]}</p>',
+            f"""<a href="{constants.TTS_PROVIDER_LINKS[row[1]]["provider_link"]}"
+                target="_blank"
+                class="provider-link"
+            >{row[1]}</a>
+            """,
+            f"""<a href="{constants.TTS_PROVIDER_LINKS[row[1]]["model_link"]}"
+                target="_blank"
+                class="provider-link"
+            >{row[2]}</a>
+            """,
+            f'<p style="text-align: center;">{row[3]}</p>',
+            f'<p style="text-align: center;">{row[4]}</p>',
+        ] for row in leaderboard_data_raw
+    ]
+def _format_battle_counts_data(battle_counts_data_raw: List[List[str]]) -> List[List[str]]:
+    """
+    Formats battle count data into a matrix format for the UI.
+    Creates a provider-by-provider matrix showing the number of direct comparisons
+    between each pair of providers. Diagonal cells show dashes as providers aren't
+    compared against themselves.
+    Args:
+        battle_counts_data_raw (List[List[str]]): Raw battle count data from the database,
+            where each inner list contains [comparison_type, count].
+    Returns:
+        List[List[str]]: HTML-formatted matrix of battle counts between providers.
+    """
+    battle_counts_dict = {item[0]: item[1] for item in battle_counts_data_raw}
+    # Create canonical comparison keys based on your expected database formats
+    comparison_keys = {
+        ("Hume AI", "OpenAI"): "Hume AI - OpenAI",
+        ("Hume AI", "ElevenLabs"): "Hume AI - ElevenLabs",
+        ("OpenAI", "ElevenLabs"): "OpenAI - ElevenLabs"
+    }
+    return [
+        [
+            f'<p style="padding-left: 8px;"><strong>{row_provider}</strong></p>'
+        ] + [
+            f"""
+            <p style="text-align: center;">
+                {"-" if row_provider == col_provider
+                    else battle_counts_dict.get(
+                        comparison_keys.get((row_provider, col_provider)) or
+                        comparison_keys.get((col_provider, row_provider), "unknown"),
+                        "0"
+                    )
+                }
+            </p>
+            """ for col_provider in constants.TTS_PROVIDERS
+        ]
+        for row_provider in constants.TTS_PROVIDERS
+    ]
+def _format_win_rate_data(win_rate_data_raw: List[List[str]]) -> List[List[str]]:
+    """
+    Formats win rate data into a matrix format for the UI.
+    Creates a provider-by-provider matrix showing the percentage of times the row
+    provider won against the column provider. Diagonal cells show dashes as
+    providers aren't compared against themselves.
+    Args:
+        win_rate_data_raw (List[List[str]]): Raw win rate data from the database,
+            where each inner list contains [comparison_type, first_win_rate, second_win_rate].
+    Returns:
+        List[List[str]]: HTML-formatted matrix of win rates between providers.
+    """
+    # Create a clean lookup dictionary with provider pairs as keys
+    win_rates = {}
+    for comparison_type, first_win_rate, second_win_rate in win_rate_data_raw:
+        provider1, provider2 = comparison_type.split(" - ")
+        win_rates[(provider1, provider2)] = first_win_rate
+        win_rates[(provider2, provider1)] = second_win_rate
+    return [
+        [
+            f'<p style="padding-left: 8px;"><strong>{row_provider}</strong></p>'
+        ] + [
+            f"""
+                <p style="text-align: center;">
+                    {"-" if row_provider == col_provider else win_rates.get((row_provider, col_provider), "0%")}
+                </p>
+            """
+            for col_provider in constants.TTS_PROVIDERS
+        ]
+        for row_provider in constants.TTS_PROVIDERS
+    ]
 def validate_env_var(var_name: str) -> str:
     """