Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -33,7 +33,7 @@ def extract_agent_info(filename: str):
|
|
33 |
return agent_type, model_name
|
34 |
|
35 |
def get_available_games() -> List[str]:
|
36 |
-
"""Extracts all unique game names from all SQLite databases and includes '
|
37 |
db_files = find_or_download_db()
|
38 |
game_names = set()
|
39 |
|
@@ -49,7 +49,7 @@ def get_available_games() -> List[str]:
|
|
49 |
conn.close()
|
50 |
|
51 |
game_list = sorted(game_names) if game_names else ["No Games Found"]
|
52 |
-
game_list.insert(0, "
|
53 |
return game_list
|
54 |
|
55 |
def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
|
@@ -66,19 +66,25 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
|
|
66 |
conn.close()
|
67 |
continue
|
68 |
|
69 |
-
if game_name == "
|
70 |
-
query = "SELECT
|
71 |
"SUM(reward) AS total_rewards " \
|
72 |
-
"FROM game_results
|
73 |
-
df = pd.read_sql_query(query, conn
|
74 |
else:
|
75 |
query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
|
76 |
"SUM(reward) AS total_rewards " \
|
77 |
"FROM game_results WHERE game_name = ?"
|
78 |
df = pd.read_sql_query(query, conn, params=(game_name,))
|
79 |
|
80 |
-
#
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Fetch average generation time from moves table
|
84 |
gen_time_query = """
|
@@ -90,15 +96,15 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
|
|
90 |
vs_random_query = """
|
91 |
SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
|
92 |
JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
|
93 |
-
WHERE
|
94 |
"""
|
95 |
total_vs_random_query = """
|
96 |
SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
|
97 |
JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
|
98 |
-
WHERE
|
99 |
"""
|
100 |
-
wins_vs_random = conn.execute(vs_random_query
|
101 |
-
total_vs_random = conn.execute(total_vs_random_query
|
102 |
vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
|
103 |
|
104 |
df.insert(0, "agent_name", model_name) # Ensure agent_name is the first column
|
@@ -119,7 +125,7 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
|
|
119 |
def generate_leaderboard_json():
|
120 |
"""Generate a JSON file containing leaderboard stats."""
|
121 |
available_games = get_available_games()
|
122 |
-
leaderboard = extract_leaderboard_stats("
|
123 |
json_file = "results/leaderboard_stats.json"
|
124 |
with open(json_file, "w", encoding="utf-8") as f:
|
125 |
json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
|
@@ -129,7 +135,7 @@ with gr.Blocks() as interface:
|
|
129 |
with gr.Tab("Leaderboard"):
|
130 |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
|
131 |
available_games = get_available_games()
|
132 |
-
leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="
|
133 |
leaderboard_table = gr.Dataframe(headers=["agent_name", "# games", "total rewards", "avg_generation_time (sec)", "win-rate", "vs_random"])
|
134 |
generate_button = gr.Button("Generate Leaderboard JSON")
|
135 |
download_component = gr.File(label="Download Leaderboard JSON")
|
|
|
33 |
return agent_type, model_name
|
34 |
|
35 |
def get_available_games() -> List[str]:
|
36 |
+
"""Extracts all unique game names from all SQLite databases and includes 'Aggregated Performance'."""
|
37 |
db_files = find_or_download_db()
|
38 |
game_names = set()
|
39 |
|
|
|
49 |
conn.close()
|
50 |
|
51 |
game_list = sorted(game_names) if game_names else ["No Games Found"]
|
52 |
+
game_list.insert(0, "Aggregated Performance") # Ensure 'Aggregated Performance' is always first
|
53 |
return game_list
|
54 |
|
55 |
def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
|
|
|
66 |
conn.close()
|
67 |
continue
|
68 |
|
69 |
+
if game_name == "Aggregated Performance":
|
70 |
+
query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
|
71 |
"SUM(reward) AS total_rewards " \
|
72 |
+
"FROM game_results"
|
73 |
+
df = pd.read_sql_query(query, conn)
|
74 |
else:
|
75 |
query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
|
76 |
"SUM(reward) AS total_rewards " \
|
77 |
"FROM game_results WHERE game_name = ?"
|
78 |
df = pd.read_sql_query(query, conn, params=(game_name,))
|
79 |
|
80 |
+
# Detect duplicate reward entries by counting unique episodes per agent
|
81 |
+
unique_episodes_query = """
|
82 |
+
SELECT COUNT(DISTINCT episode) FROM game_results WHERE game_name = ?
|
83 |
+
"""
|
84 |
+
unique_episodes = conn.execute(unique_episodes_query, (game_name,)).fetchone()[0] or 1
|
85 |
+
|
86 |
+
# Adjust total_rewards only if the count of unique episodes suggests duplication
|
87 |
+
df["total_rewards"] = df["total_rewards"].fillna(0).astype(float) / unique_episodes
|
88 |
|
89 |
# Fetch average generation time from moves table
|
90 |
gen_time_query = """
|
|
|
96 |
vs_random_query = """
|
97 |
SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
|
98 |
JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
|
99 |
+
WHERE m.opponent = 'random_None' AND gr.reward > 0
|
100 |
"""
|
101 |
total_vs_random_query = """
|
102 |
SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
|
103 |
JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
|
104 |
+
WHERE m.opponent = 'random_None'
|
105 |
"""
|
106 |
+
wins_vs_random = conn.execute(vs_random_query).fetchone()[0] or 0
|
107 |
+
total_vs_random = conn.execute(total_vs_random_query).fetchone()[0] or 0
|
108 |
vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
|
109 |
|
110 |
df.insert(0, "agent_name", model_name) # Ensure agent_name is the first column
|
|
|
125 |
def generate_leaderboard_json():
|
126 |
"""Generate a JSON file containing leaderboard stats."""
|
127 |
available_games = get_available_games()
|
128 |
+
leaderboard = extract_leaderboard_stats("Aggregated Performance").to_dict(orient="records")
|
129 |
json_file = "results/leaderboard_stats.json"
|
130 |
with open(json_file, "w", encoding="utf-8") as f:
|
131 |
json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
|
|
|
135 |
with gr.Tab("Leaderboard"):
|
136 |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
|
137 |
available_games = get_available_games()
|
138 |
+
leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Aggregated Performance")
|
139 |
leaderboard_table = gr.Dataframe(headers=["agent_name", "# games", "total rewards", "avg_generation_time (sec)", "win-rate", "vs_random"])
|
140 |
generate_button = gr.Button("Generate Leaderboard JSON")
|
141 |
download_component = gr.File(label="Download Leaderboard JSON")
|