lcipolina commited on
Commit
d2fa748
·
verified ·
1 Parent(s): 3d99af2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -10
app.py CHANGED
@@ -33,7 +33,7 @@ def extract_agent_info(filename: str):
33
  return agent_type, model_name
34
 
35
  def get_available_games() -> List[str]:
36
- """Extracts all unique game names from all SQLite databases."""
37
  db_files = find_or_download_db()
38
  game_names = set()
39
 
@@ -48,7 +48,9 @@ def get_available_games() -> List[str]:
48
  finally:
49
  conn.close()
50
 
51
- return sorted(game_names) if game_names else ["No Games Found"]
 
 
52
 
53
  def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
54
  """Extract and aggregate leaderboard stats from all SQLite databases."""
@@ -59,23 +61,48 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
59
  conn = sqlite3.connect(db_file)
60
  agent_type, model_name = extract_agent_info(db_file)
61
 
62
- query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
63
- "AVG(generation_time) AS avg_gen_time, SUM(reward) AS total_rewards " \
64
- "FROM game_results WHERE game_name = ?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- df = pd.read_sql_query(query, conn, params=(game_name,))
67
  df["agent_name"] = model_name
68
  df["agent_type"] = agent_type
 
 
69
  all_stats.append(df)
70
  conn.close()
71
 
72
  leaderboard_df = pd.concat(all_stats, ignore_index=True) if all_stats else pd.DataFrame()
 
 
 
 
73
  return leaderboard_df
74
 
75
  def generate_leaderboard_json():
76
  """Generate a JSON file containing leaderboard stats."""
77
  available_games = get_available_games()
78
- leaderboard = extract_leaderboard_stats(available_games[0]).to_dict(orient="records")
79
  json_file = "results/leaderboard_stats.json"
80
  with open(json_file, "w", encoding="utf-8") as f:
81
  json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
@@ -83,10 +110,10 @@ def generate_leaderboard_json():
83
 
84
  with gr.Blocks() as interface:
85
  with gr.Tab("Leaderboard"):
86
- gr.Markdown("# Leaderboard")
87
  available_games = get_available_games()
88
- leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value=available_games[0])
89
- leaderboard_table = gr.Dataframe()
90
  generate_button = gr.Button("Generate Leaderboard JSON")
91
  download_component = gr.File(label="Download Leaderboard JSON")
92
  refresh_button = gr.Button("Refresh Leaderboard")
 
33
  return agent_type, model_name
34
 
35
  def get_available_games() -> List[str]:
36
+ """Extracts all unique game names from all SQLite databases and includes 'Total Performance'."""
37
  db_files = find_or_download_db()
38
  game_names = set()
39
 
 
48
  finally:
49
  conn.close()
50
 
51
+ game_list = sorted(game_names) if game_names else ["No Games Found"]
52
+ game_list.insert(0, "Total Performance") # Ensure 'Total Performance' is always first
53
+ return game_list
54
 
55
  def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
56
  """Extract and aggregate leaderboard stats from all SQLite databases."""
 
61
  conn = sqlite3.connect(db_file)
62
  agent_type, model_name = extract_agent_info(db_file)
63
 
64
+ if game_name == "Total Performance":
65
+ query = "SELECT game_name, COUNT(DISTINCT episode) AS games_played, " \
66
+ "AVG(generation_time) AS avg_gen_time, SUM(reward) AS total_rewards " \
67
+ "FROM game_results GROUP BY game_name"
68
+ df = pd.read_sql_query(query, conn)
69
+ else:
70
+ query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
71
+ "AVG(generation_time) AS avg_gen_time, SUM(reward) AS total_rewards " \
72
+ "FROM game_results WHERE game_name = ?"
73
+ df = pd.read_sql_query(query, conn, params=(game_name,))
74
+
75
+ # Calculate win rate against random bot
76
+ vs_random_query = """
77
+ SELECT COUNT(*) FROM game_results
78
+ WHERE game_name = ? AND opponent = 'random_None' AND reward > 0
79
+ """
80
+ total_vs_random_query = """
81
+ SELECT COUNT(*) FROM game_results
82
+ WHERE game_name = ? AND opponent = 'random_None'
83
+ """
84
+ wins_vs_random = conn.execute(vs_random_query, (game_name,)).fetchone()[0] or 0
85
+ total_vs_random = conn.execute(total_vs_random_query, (game_name,)).fetchone()[0] or 0
86
+ vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
87
 
 
88
  df["agent_name"] = model_name
89
  df["agent_type"] = agent_type
90
+ df["vs_random"] = round(vs_random_rate, 2)
91
+
92
  all_stats.append(df)
93
  conn.close()
94
 
95
  leaderboard_df = pd.concat(all_stats, ignore_index=True) if all_stats else pd.DataFrame()
96
+
97
+ if leaderboard_df.empty:
98
+ leaderboard_df = pd.DataFrame(columns=["LLM Model", "# games", "moves/game", "illegal-moves", "win-rate", "vs Random"])
99
+
100
  return leaderboard_df
101
 
102
  def generate_leaderboard_json():
103
  """Generate a JSON file containing leaderboard stats."""
104
  available_games = get_available_games()
105
+ leaderboard = extract_leaderboard_stats("Total Performance").to_dict(orient="records")
106
  json_file = "results/leaderboard_stats.json"
107
  with open(json_file, "w", encoding="utf-8") as f:
108
  json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
 
110
 
111
  with gr.Blocks() as interface:
112
  with gr.Tab("Leaderboard"):
113
+ gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
114
  available_games = get_available_games()
115
+ leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Total Performance")
116
+ leaderboard_table = gr.Dataframe(headers=["LLM Model", "# games", "moves/game", "illegal-moves", "win-rate", "vs Random"])
117
  generate_button = gr.Button("Generate Leaderboard JSON")
118
  download_component = gr.File(label="Download Leaderboard JSON")
119
  refresh_button = gr.Button("Refresh Leaderboard")