lcipolina commited on
Commit
7e15c9c
ยท
verified ยท
1 Parent(s): ee43f7c

Updated after name change

Browse files
Files changed (3) hide show
  1. ui/README.md +42 -0
  2. ui/__init__.py +7 -0
  3. ui/gradio_config_generator.py +445 -0
ui/README.md CHANGED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gradio Interface Components
2
+
3
+ This directory contains the Gradio interface components for the Board Game Arena.
4
+
5
+ ## Files
6
+
7
+ - `gradio_config_generator.py` - Configuration generator that bridges Gradio UI with the game infrastructure
8
+ - `__init__.py` - Package initialization
9
+
10
+ ## Main App
11
+
12
+ The main Gradio app (`app.py`) is located in the root directory for HuggingFace Spaces compatibility.
13
+
14
+ ## Running the App
15
+
16
+ From the project root directory:
17
+
18
+ ```bash
19
+ python app.py
20
+ ```
21
+
22
+ ## Architecture
23
+
24
+ ```
25
+ app.py (Gradio UI - in root directory for HF Spaces)
26
+ โ†“
27
+ ui/gradio_config_generator.py (Game configuration bridge)
28
+ โ†“
29
+ src/game_reasoning_arena/ (Core game library)
30
+ ```
31
+
32
+ The Gradio app provides:
33
+ - Interactive game interface
34
+ - Performance leaderboards
35
+ - Metrics dashboards
36
+ - LLM reasoning analysis
37
+
38
+
39
+ ## Uploading Results
40
+
41
+ - Go to **Leaderboard** tab โ†’ **Upload .db**
42
+ - Files are stored in `scripts/results/` inside the Space
ui/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio interface components for Board Game Arena.
3
+ """
4
+
5
+ from .gradio_config_generator import run_game_with_existing_infrastructure
6
+
7
+ __all__ = ['run_game_with_existing_infrastructure']
ui/gradio_config_generator.py ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Gradio Configuration Generator
4
+
5
+ This module creates configurations compatible with the existing runner.py and
6
+ simulate.py infrastructure, eliminating code duplication in the Gradio app.
7
+ """
8
+
9
+ import tempfile
10
+ import yaml
11
+ from typing import Dict, Any, Tuple
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def create_config_for_gradio_game(
18
+ game_name: str,
19
+ player1_type: str,
20
+ player2_type: str,
21
+ player1_model: str = None,
22
+ player2_model: str = None,
23
+ rounds: int = 1,
24
+ seed: int = 42,
25
+ use_ray: bool = False
26
+ ) -> Dict[str, Any]:
27
+ """
28
+ Create a configuration dictionary compatible with the existing
29
+ runner.py and simulate.py infrastructure.
30
+
31
+ Args:
32
+ game_name: Name of the game to play
33
+ player1_type: Type of player 1 (human, random, llm)
34
+ player2_type: Type of player 2 (human, random, llm)
35
+ player1_model: LLM model for player 1 (if applicable)
36
+ player2_model: LLM model for player 2 (if applicable)
37
+ rounds: Number of episodes to play
38
+ seed: Random seed for reproducibility
39
+ use_ray: Whether to use Ray for parallel processing
40
+
41
+ Returns:
42
+ Configuration dictionary compatible with runner.py
43
+ """
44
+
45
+ # Base configuration structure (matches default_simulation_config)
46
+ config = {
47
+ "env_config": {
48
+ "game_name": game_name,
49
+ "max_game_rounds": None,
50
+ },
51
+ "num_episodes": rounds,
52
+ "seed": seed,
53
+ "use_ray": use_ray,
54
+ "mode": f"{player1_type}_vs_{player2_type}",
55
+ "agents": {},
56
+ "llm_backend": {
57
+ "max_tokens": 250,
58
+ "temperature": 0.1,
59
+ "default_model": "litellm_groq/gemma-7b-it",
60
+ },
61
+ "log_level": "INFO",
62
+ }
63
+
64
+ # Configure player agents
65
+ config["agents"]["player_0"] = _create_agent_config(
66
+ player1_type, player1_model)
67
+ config["agents"]["player_1"] = _create_agent_config(
68
+ player2_type, player2_model)
69
+
70
+ # Debug: Print the agent configurations
71
+ print("๐Ÿ“‹ CONFIG DEBUG: Agent configurations created:")
72
+ print(f" Player 0 config: {config['agents']['player_0']}")
73
+ print(f" Player 1 config: {config['agents']['player_1']}")
74
+
75
+ # Update backend default model if LLM is used
76
+ # Check player 1 first
77
+ if (player1_type == "llm" and player1_model) or player1_type.startswith("llm_"):
78
+ if player1_model:
79
+ config["llm_backend"]["default_model"] = player1_model
80
+ elif player1_type.startswith("llm_"):
81
+ # Extract model from player type (e.g., "llm_gpt2" -> "gpt2")
82
+ config["llm_backend"]["default_model"] = player1_type[4:]
83
+ # Check player 2 if player 1 doesn't have LLM
84
+ elif (player2_type == "llm" and player2_model) or player2_type.startswith("llm_"):
85
+ if player2_model:
86
+ config["llm_backend"]["default_model"] = player2_model
87
+ elif player2_type.startswith("llm_"):
88
+ # Extract model from player type (e.g., "llm_gpt2" -> "gpt2")
89
+ config["llm_backend"]["default_model"] = player2_type[4:]
90
+
91
+ return config
92
+
93
+
94
+ def _create_agent_config(player_type: str,
95
+ model: str = None) -> Dict[str, Any]:
96
+ """
97
+ Create agent configuration based on player type and model.
98
+
99
+ Handles both Gradio-specific formats (e.g., "hf_gpt2", "random_bot")
100
+ and standard formats (e.g., "llm", "random").
101
+
102
+ Args:
103
+ player_type: Type of player (human, random, random_bot, hf_*, etc.)
104
+ model: Model name for LLM agents
105
+
106
+ Returns:
107
+ Agent configuration dictionary
108
+ """
109
+ print("๐Ÿ”ง AGENT CONFIG DEBUG: Creating agent config for:")
110
+ print(f" player_type: {player_type}")
111
+ print(f" model: {model}")
112
+
113
+ # Handle Gradio-specific formats
114
+ if player_type == "random_bot":
115
+ config = {"type": "random"}
116
+ elif player_type.startswith("hf_"):
117
+ # Extract model from player type (e.g., "hf_gpt2" -> "gpt2")
118
+ model_from_type = player_type[3:] # Remove "hf_" prefix
119
+
120
+ # Use the hf_prefixed model name for LLM registry lookup
121
+ model_name = f"hf_{model_from_type}"
122
+
123
+ config = {
124
+ "type": "llm", # Use standard LLM agent type
125
+ "model": model_name # This will be looked up in LLM_REGISTRY
126
+ }
127
+ elif player_type.startswith("llm_"):
128
+ # For backwards compatibility with LiteLLM models
129
+ model_from_type = player_type[4:] # Remove "llm_" prefix
130
+
131
+ # Map display model names to actual model names with prefixes
132
+ model_name = model or model_from_type
133
+ if not model_name.startswith(("litellm_", "vllm_")):
134
+ # Add litellm_ prefix for LiteLLM models
135
+ model_name = f"litellm_{model_name}"
136
+
137
+ config = {
138
+ "type": "llm",
139
+ "model": model_name
140
+ }
141
+ elif player_type == "llm":
142
+ model_name = model or "litellm_groq/gemma-7b-it"
143
+ if not model_name.startswith(("litellm_", "vllm_")):
144
+ model_name = f"litellm_{model_name}"
145
+ config = {
146
+ "type": "llm",
147
+ "model": model_name
148
+ }
149
+ elif player_type == "random":
150
+ config = {"type": "random"}
151
+ elif player_type == "human":
152
+ config = {"type": "human"} # This might need additional handling
153
+ else:
154
+ # Default to random for unknown types
155
+ config = {"type": "random"}
156
+
157
+ print(f" โ†’ Created config: {config}")
158
+ return config
159
+
160
+
161
+ def create_temporary_config_file(config: Dict[str, Any]) -> str:
162
+ """
163
+ Create a temporary YAML config file that can be used with runner.py.
164
+
165
+ Args:
166
+ config: Configuration dictionary
167
+
168
+ Returns:
169
+ Path to the temporary config file
170
+ """
171
+ # Create temporary file
172
+ temp_file = tempfile.NamedTemporaryFile(
173
+ mode='w',
174
+ suffix='.yaml',
175
+ delete=False
176
+ )
177
+
178
+ try:
179
+ yaml.dump(config, temp_file, default_flow_style=False)
180
+ temp_file.flush()
181
+ return temp_file.name
182
+ finally:
183
+ temp_file.close()
184
+
185
+
186
+ def run_game_with_existing_infrastructure(
187
+ game_name: str,
188
+ player1_type: str,
189
+ player2_type: str,
190
+ player1_model: str = None,
191
+ player2_model: str = None,
192
+ rounds: int = 1,
193
+ seed: int = 42
194
+ ) -> str:
195
+ """
196
+ Run a game using the existing runner.py and simulate.py infrastructure,
197
+ but capture detailed game logs for Gradio display.
198
+
199
+ This function reuses the existing simulation infrastructure while providing
200
+ detailed game output for the Gradio interface.
201
+
202
+ Args:
203
+ game_name: Name of the game to play
204
+ player1_type: Type of player 1
205
+ player2_type: Type of player 2
206
+ player1_model: LLM model for player 1 (if applicable)
207
+ player2_model: LLM model for player 2 (if applicable)
208
+ rounds: Number of episodes to play
209
+ seed: Random seed
210
+
211
+ Returns:
212
+ Detailed game simulation results as a string
213
+ """
214
+ try:
215
+ # Import the existing infrastructure
216
+ from src.game_reasoning_arena.arena.utils.seeding import set_seed
217
+ from src.game_reasoning_arena.backends import initialize_llm_registry
218
+ from src.game_reasoning_arena.arena.games.registry import registry
219
+ from src.game_reasoning_arena.arena.agents.policy_manager import (
220
+ initialize_policies, policy_mapping_fn
221
+ )
222
+
223
+ # Create configuration
224
+ config = create_config_for_gradio_game(
225
+ game_name=game_name,
226
+ player1_type=player1_type,
227
+ player2_type=player2_type,
228
+ player1_model=player1_model,
229
+ player2_model=player2_model,
230
+ rounds=rounds,
231
+ seed=seed
232
+ )
233
+
234
+ # Set seed
235
+ set_seed(seed)
236
+
237
+ # Initialize LLM registry (required for simulate_game)
238
+ initialize_llm_registry(config)
239
+
240
+ # Use existing infrastructure but capture detailed logs
241
+ return _run_game_with_detailed_logging(game_name, config, seed)
242
+
243
+ except ImportError as e:
244
+ logger.error(f"Failed to import simulation infrastructure: {e}")
245
+ return f"Error: Simulation infrastructure not available. {e}"
246
+ except Exception as e:
247
+ logger.error(f"Game simulation failed: {e}")
248
+ return f"Error during game simulation: {e}"
249
+
250
+
251
+ def _run_game_with_detailed_logging(
252
+ game_name: str,
253
+ config: Dict[str, Any],
254
+ seed: int
255
+ ) -> str:
256
+ """
257
+ Run game simulation with detailed logging for Gradio display.
258
+
259
+ This reuses the existing infrastructure components but captures
260
+ detailed game state information for user display.
261
+ """
262
+ from src.game_reasoning_arena.arena.games.registry import registry
263
+ from src.game_reasoning_arena.arena.agents.policy_manager import (
264
+ initialize_policies, policy_mapping_fn
265
+ )
266
+
267
+ # Initialize using existing infrastructure
268
+ policies_dict = initialize_policies(config, game_name, seed)
269
+ env = registry.make_env(game_name, config)
270
+
271
+ # Create player mapping (reusing existing logic)
272
+ player_to_agent = {}
273
+ for i, policy_name in enumerate(policies_dict.keys()):
274
+ player_to_agent[i] = policies_dict[policy_name]
275
+
276
+ game_log = []
277
+
278
+ # Add header
279
+ game_log.append("๐ŸŽฎ GAME SIMULATION RESULTS")
280
+ game_log.append("=" * 50)
281
+ game_log.append(f"Game: {game_name.replace('_', ' ').title()}")
282
+ game_log.append(f"Episodes: {config['num_episodes']}")
283
+ game_log.append("")
284
+
285
+ # Player information
286
+ game_log.append("๐Ÿ‘ฅ PLAYERS:")
287
+ player1 = config["agents"]["player_0"]
288
+ player2 = config["agents"]["player_1"]
289
+ game_log.append(f" Player 0: {_format_player_info(player1)}")
290
+ game_log.append(f" Player 1: {_format_player_info(player2)}")
291
+ game_log.append("")
292
+
293
+ # Run episodes (reusing compute_actions logic from simulate.py)
294
+ for episode in range(config["num_episodes"]):
295
+ episode_seed = seed + episode
296
+ game_log.append(f"๐ŸŽฏ Episode {episode + 1}")
297
+ game_log.append("-" * 30)
298
+
299
+ observation_dict, _ = env.reset(seed=episode_seed)
300
+ terminated = truncated = False
301
+ step_count = 0
302
+ episode_rewards = {0: 0, 1: 0}
303
+
304
+ while not (terminated or truncated):
305
+ step_count += 1
306
+ game_log.append(f"\n๐Ÿ“‹ Step {step_count}")
307
+
308
+ # Show board state
309
+ try:
310
+ board = env.render_board(0)
311
+ game_log.append("Current board:")
312
+ game_log.append(board)
313
+ except:
314
+ game_log.append("Board state not available")
315
+
316
+ # Use the existing compute_actions logic from simulate.py
317
+ try:
318
+ action_dict = _compute_actions_for_gradio(
319
+ env, player_to_agent, observation_dict, game_log
320
+ )
321
+ except Exception as e:
322
+ game_log.append(f"โŒ Error computing actions: {e}")
323
+ truncated = True
324
+ break
325
+
326
+ # Step forward (reusing existing environment logic)
327
+ if not truncated:
328
+ observation_dict, rewards, terminated, truncated, _ = env.step(action_dict)
329
+ for player_id, reward in rewards.items():
330
+ episode_rewards[player_id] += reward
331
+
332
+ # Episode results
333
+ game_log.append(f"\n๐Ÿ Episode {episode + 1} Complete!")
334
+ try:
335
+ game_log.append("Final board:")
336
+ game_log.append(env.render_board(0))
337
+ except:
338
+ game_log.append("Final board state not available")
339
+
340
+ if episode_rewards[0] > episode_rewards[1]:
341
+ winner = "Player 0"
342
+ elif episode_rewards[1] > episode_rewards[0]:
343
+ winner = "Player 1"
344
+ else:
345
+ winner = "Draw"
346
+
347
+ game_log.append(f"๐Ÿ† Winner: {winner}")
348
+ game_log.append(f"๐Ÿ“Š Scores: Player 0={episode_rewards[0]}, Player 1={episode_rewards[1]}")
349
+ game_log.append("")
350
+
351
+ game_log.append("โœ… Simulation completed successfully!")
352
+ game_log.append("Check the database logs for detailed move analysis.")
353
+
354
+ return "\n".join(game_log)
355
+
356
+
357
+ def _compute_actions_for_gradio(env, player_to_agent, observations, game_log):
358
+ """
359
+ Compute actions and log details for Gradio display.
360
+ This reuses the compute_actions logic from simulate.py.
361
+ """
362
+ if env.state.is_simultaneous_node():
363
+ # Simultaneous-move game
364
+ actions = {}
365
+ for player in player_to_agent:
366
+ agent_response = player_to_agent[player](observations[player])
367
+ action, reasoning = _extract_action_and_reasoning(agent_response)
368
+ actions[player] = action
369
+
370
+ game_log.append(f" Player {player} chooses action {action}")
371
+ if reasoning and reasoning != "None":
372
+ reasoning_preview = reasoning[:100] + ("..." if len(reasoning) > 100 else "")
373
+ game_log.append(f" Reasoning: {reasoning_preview}")
374
+ return actions
375
+ else:
376
+ # Turn-based game
377
+ current_player = env.state.current_player()
378
+ game_log.append(f"Player {current_player}'s turn")
379
+
380
+ agent_response = player_to_agent[current_player](observations[current_player])
381
+ action, reasoning = _extract_action_and_reasoning(agent_response)
382
+
383
+ game_log.append(f" Player {current_player} chooses action {action}")
384
+ if reasoning and reasoning != "None":
385
+ reasoning_preview = reasoning[:100] + ("..." if len(reasoning) > 100 else "")
386
+ game_log.append(f" Reasoning: {reasoning_preview}")
387
+
388
+ return {current_player: action}
389
+
390
+
391
+ def _extract_action_and_reasoning(agent_response):
392
+ """Extract action and reasoning from agent response."""
393
+ if isinstance(agent_response, dict) and "action" in agent_response:
394
+ action = agent_response.get("action", -1)
395
+ reasoning = agent_response.get("reasoning", "None")
396
+ return action, reasoning
397
+ else:
398
+ return agent_response, "None"
399
+
400
+
401
+ def _format_player_info(player_config: Dict[str, Any]) -> str:
402
+ """Format player information for display."""
403
+ player_type = player_config["type"]
404
+ if player_type == "llm":
405
+ model = player_config.get("model", "unknown")
406
+ return f"LLM ({model})"
407
+ else:
408
+ return player_type.replace("_", " ").title()
409
+
410
+
411
+ # For backward compatibility and easy integration
412
+ def create_gradio_compatible_config(
413
+ game_name: str,
414
+ player1_type: str,
415
+ player2_type: str,
416
+ player1_model: str = None,
417
+ player2_model: str = None,
418
+ rounds: int = 1
419
+ ) -> Tuple[Dict[str, Any], str]:
420
+ """
421
+ Create both a config dict and a temp file for maximum compatibility.
422
+
423
+ Returns:
424
+ Tuple of (config_dict, temp_file_path)
425
+ """
426
+ config = create_config_for_gradio_game(
427
+ game_name, player1_type, player2_type,
428
+ player1_model, player2_model, rounds
429
+ )
430
+ temp_file = create_temporary_config_file(config)
431
+ return config, temp_file
432
+
433
+
434
+ if __name__ == "__main__":
435
+ # Example usage
436
+ config = create_config_for_gradio_game(
437
+ game_name="tic_tac_toe",
438
+ player1_type="llm",
439
+ player2_type="random",
440
+ player1_model="litellm_groq/llama-3.1-8b-instant",
441
+ rounds=3
442
+ )
443
+
444
+ print("Generated configuration:")
445
+ print(yaml.dump(config, default_flow_style=False))