lcipolina commited on
Commit
9449bbc
·
verified ·
1 Parent(s): ccd246f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -33,7 +33,7 @@ def extract_agent_info(filename: str):
33
  return agent_type, model_name
34
 
35
  def get_available_games() -> List[str]:
36
- """Extracts all unique game names from all SQLite databases and includes 'Total Performance'."""
37
  db_files = find_or_download_db()
38
  game_names = set()
39
 
@@ -49,7 +49,7 @@ def get_available_games() -> List[str]:
49
  conn.close()
50
 
51
  game_list = sorted(game_names) if game_names else ["No Games Found"]
52
- game_list.insert(0, "Total Performance") # Ensure 'Total Performance' is always first
53
  return game_list
54
 
55
  def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
@@ -66,19 +66,25 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
66
  conn.close()
67
  continue
68
 
69
- if game_name == "Total Performance":
70
- query = "SELECT game_name, COUNT(DISTINCT episode) AS games_played, " \
71
  "SUM(reward) AS total_rewards " \
72
- "FROM game_results WHERE game_name = ? GROUP BY game_name"
73
- df = pd.read_sql_query(query, conn, params=(game_name,))
74
  else:
75
  query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
76
  "SUM(reward) AS total_rewards " \
77
  "FROM game_results WHERE game_name = ?"
78
  df = pd.read_sql_query(query, conn, params=(game_name,))
79
 
80
- # Ensure rewards are correctly summed per agent, not duplicated
81
- df["total_rewards"] = df["total_rewards"].fillna(0).astype(float) / 2
 
 
 
 
 
 
82
 
83
  # Fetch average generation time from moves table
84
  gen_time_query = """
@@ -90,15 +96,15 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
90
  vs_random_query = """
91
  SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
92
  JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
93
- WHERE gr.game_name = ? AND m.opponent = 'random_None' AND gr.reward > 0
94
  """
95
  total_vs_random_query = """
96
  SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
97
  JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
98
- WHERE gr.game_name = ? AND m.opponent = 'random_None'
99
  """
100
- wins_vs_random = conn.execute(vs_random_query, (game_name,)).fetchone()[0] or 0
101
- total_vs_random = conn.execute(total_vs_random_query, (game_name,)).fetchone()[0] or 0
102
  vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
103
 
104
  df.insert(0, "agent_name", model_name) # Ensure agent_name is the first column
@@ -119,7 +125,7 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
119
  def generate_leaderboard_json():
120
  """Generate a JSON file containing leaderboard stats."""
121
  available_games = get_available_games()
122
- leaderboard = extract_leaderboard_stats("Total Performance").to_dict(orient="records")
123
  json_file = "results/leaderboard_stats.json"
124
  with open(json_file, "w", encoding="utf-8") as f:
125
  json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
@@ -129,7 +135,7 @@ with gr.Blocks() as interface:
129
  with gr.Tab("Leaderboard"):
130
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
131
  available_games = get_available_games()
132
- leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Total Performance")
133
  leaderboard_table = gr.Dataframe(headers=["agent_name", "# games", "total rewards", "avg_generation_time (sec)", "win-rate", "vs_random"])
134
  generate_button = gr.Button("Generate Leaderboard JSON")
135
  download_component = gr.File(label="Download Leaderboard JSON")
 
33
  return agent_type, model_name
34
 
35
  def get_available_games() -> List[str]:
36
+ """Extracts all unique game names from all SQLite databases and includes 'Aggregated Performance'."""
37
  db_files = find_or_download_db()
38
  game_names = set()
39
 
 
49
  conn.close()
50
 
51
  game_list = sorted(game_names) if game_names else ["No Games Found"]
52
+ game_list.insert(0, "Aggregated Performance") # Ensure 'Aggregated Performance' is always first
53
  return game_list
54
 
55
  def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
 
66
  conn.close()
67
  continue
68
 
69
+ if game_name == "Aggregated Performance":
70
+ query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
71
  "SUM(reward) AS total_rewards " \
72
+ "FROM game_results"
73
+ df = pd.read_sql_query(query, conn)
74
  else:
75
  query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
76
  "SUM(reward) AS total_rewards " \
77
  "FROM game_results WHERE game_name = ?"
78
  df = pd.read_sql_query(query, conn, params=(game_name,))
79
 
80
+ # Detect duplicate reward entries by counting unique episodes per agent
81
+ unique_episodes_query = """
82
+ SELECT COUNT(DISTINCT episode) FROM game_results WHERE game_name = ?
83
+ """
84
+ unique_episodes = conn.execute(unique_episodes_query, (game_name,)).fetchone()[0] or 1
85
+
86
+ # Adjust total_rewards only if the count of unique episodes suggests duplication
87
+ df["total_rewards"] = df["total_rewards"].fillna(0).astype(float) / unique_episodes
88
 
89
  # Fetch average generation time from moves table
90
  gen_time_query = """
 
96
  vs_random_query = """
97
  SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
98
  JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
99
+ WHERE m.opponent = 'random_None' AND gr.reward > 0
100
  """
101
  total_vs_random_query = """
102
  SELECT COUNT(DISTINCT gr.episode) FROM game_results gr
103
  JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
104
+ WHERE m.opponent = 'random_None'
105
  """
106
+ wins_vs_random = conn.execute(vs_random_query).fetchone()[0] or 0
107
+ total_vs_random = conn.execute(total_vs_random_query).fetchone()[0] or 0
108
  vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
109
 
110
  df.insert(0, "agent_name", model_name) # Ensure agent_name is the first column
 
125
  def generate_leaderboard_json():
126
  """Generate a JSON file containing leaderboard stats."""
127
  available_games = get_available_games()
128
+ leaderboard = extract_leaderboard_stats("Aggregated Performance").to_dict(orient="records")
129
  json_file = "results/leaderboard_stats.json"
130
  with open(json_file, "w", encoding="utf-8") as f:
131
  json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
 
135
  with gr.Tab("Leaderboard"):
136
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
137
  available_games = get_available_games()
138
+ leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Aggregated Performance")
139
  leaderboard_table = gr.Dataframe(headers=["agent_name", "# games", "total rewards", "avg_generation_time (sec)", "win-rate", "vs_random"])
140
  generate_button = gr.Button("Generate Leaderboard JSON")
141
  download_component = gr.File(label="Download Leaderboard JSON")