A2C playing MountainCarContinuous-v0 from https://github.com/sgoodfriend/rl-algo-impls/tree/2067e21d62fff5db60168687e7d9e89019a8bfc0
200e488
| CartPole-v1: &cartpole-defaults | |
| n_timesteps: !!float 4e5 | |
| algo_hyperparams: | |
| n_steps: 4096 | |
| pi_lr: 0.01 | |
| gamma: 0.99 | |
| gae_lambda: 1 | |
| val_lr: 0.01 | |
| train_v_iters: 80 | |
| eval_params: | |
| step_freq: !!float 2.5e4 | |
| CartPole-v0: | |
| <<: *cartpole-defaults | |
| n_timesteps: !!float 1e5 | |
| algo_hyperparams: | |
| n_steps: 1024 | |
| pi_lr: 0.01 | |
| gamma: 0.99 | |
| gae_lambda: 1 | |
| val_lr: 0.01 | |
| train_v_iters: 80 | |
| MountainCar-v0: | |
| n_timesteps: !!float 1e6 | |
| env_hyperparams: | |
| normalize: true | |
| n_envs: 16 | |
| algo_hyperparams: | |
| n_steps: 200 | |
| pi_lr: 0.005 | |
| gamma: 0.99 | |
| gae_lambda: 0.97 | |
| val_lr: 0.01 | |
| train_v_iters: 80 | |
| max_grad_norm: 0.5 | |
| MountainCarContinuous-v0: | |
| n_timesteps: !!float 3e5 | |
| env_hyperparams: | |
| normalize: true | |
| n_envs: 4 | |
| # policy_hyperparams: | |
| # init_layers_orthogonal: false | |
| # log_std_init: -3.29 | |
| # use_sde: true | |
| algo_hyperparams: | |
| n_steps: 1000 | |
| pi_lr: !!float 5e-4 | |
| gamma: 0.99 | |
| gae_lambda: 0.9 | |
| val_lr: !!float 1e-3 | |
| train_v_iters: 80 | |
| max_grad_norm: 5 | |
| eval_params: | |
| step_freq: 5000 | |
| Acrobot-v1: | |
| n_timesteps: !!float 2e5 | |
| algo_hyperparams: | |
| n_steps: 2048 | |
| pi_lr: 0.005 | |
| gamma: 0.99 | |
| gae_lambda: 0.97 | |
| val_lr: 0.01 | |
| train_v_iters: 80 | |
| max_grad_norm: 0.5 | |
| LunarLander-v2: | |
| n_timesteps: !!float 4e6 | |
| policy_hyperparams: | |
| hidden_sizes: [256, 256] | |
| algo_hyperparams: | |
| n_steps: 2048 | |
| pi_lr: 0.0001 | |
| gamma: 0.999 | |
| gae_lambda: 0.97 | |
| val_lr: 0.0001 | |
| train_v_iters: 80 | |
| max_grad_norm: 0.5 | |
| eval_params: | |
| deterministic: false | |
| BipedalWalker-v3: | |
| n_timesteps: !!float 10e6 | |
| env_hyperparams: | |
| n_envs: 16 | |
| normalize: true | |
| policy_hyperparams: | |
| hidden_sizes: [256, 256] | |
| algo_hyperparams: | |
| n_steps: 1600 | |
| gae_lambda: 0.95 | |
| gamma: 0.99 | |
| pi_lr: !!float 1e-4 | |
| val_lr: !!float 1e-4 | |
| train_v_iters: 80 | |
| max_grad_norm: 0.5 | |
| eval_params: | |
| deterministic: false | |
| CarRacing-v0: | |
| n_timesteps: !!float 4e6 | |
| env_hyperparams: | |
| frame_stack: 4 | |
| n_envs: 4 | |
| vec_env_class: sync | |
| policy_hyperparams: | |
| use_sde: true | |
| log_std_init: -2 | |
| init_layers_orthogonal: false | |
| activation_fn: relu | |
| cnn_feature_dim: 256 | |
| hidden_sizes: [256] | |
| algo_hyperparams: | |
| n_steps: 1000 | |
| pi_lr: !!float 5e-5 | |
| gamma: 0.99 | |
| gae_lambda: 0.95 | |
| val_lr: !!float 1e-4 | |
| train_v_iters: 40 | |
| max_grad_norm: 0.5 | |
| sde_sample_freq: 4 | |
| HalfCheetahBulletEnv-v0: &pybullet-defaults | |
| n_timesteps: !!float 2e6 | |
| env_hyperparams: &pybullet-env-defaults | |
| normalize: true | |
| policy_hyperparams: &pybullet-policy-defaults | |
| hidden_sizes: [256, 256] | |
| algo_hyperparams: &pybullet-algo-defaults | |
| n_steps: 4000 | |
| pi_lr: !!float 3e-4 | |
| gamma: 0.99 | |
| gae_lambda: 0.97 | |
| val_lr: !!float 1e-3 | |
| train_v_iters: 80 | |
| max_grad_norm: 0.5 | |
| AntBulletEnv-v0: | |
| <<: *pybullet-defaults | |
| policy_hyperparams: | |
| <<: *pybullet-policy-defaults | |
| hidden_sizes: [400, 300] | |
| algo_hyperparams: | |
| <<: *pybullet-algo-defaults | |
| pi_lr: !!float 7e-4 | |
| val_lr: !!float 7e-3 | |
| HopperBulletEnv-v0: | |
| <<: *pybullet-defaults | |
| Walker2DBulletEnv-v0: | |
| <<: *pybullet-defaults | |
| FrozenLake-v1: | |
| n_timesteps: !!float 8e5 | |
| env_params: | |
| make_kwargs: | |
| map_name: 8x8 | |
| is_slippery: true | |
| policy_hyperparams: | |
| hidden_sizes: [64] | |
| algo_hyperparams: | |
| n_steps: 2048 | |
| pi_lr: 0.01 | |
| gamma: 0.99 | |
| gae_lambda: 0.98 | |
| val_lr: 0.01 | |
| train_v_iters: 80 | |
| max_grad_norm: 0.5 | |
| eval_params: | |
| step_freq: !!float 5e4 | |
| n_episodes: 10 | |
| save_best: true | |
| _atari: &atari-defaults | |
| n_timesteps: !!float 25e6 | |
| env_hyperparams: | |
| n_envs: 4 | |
| frame_stack: 4 | |
| no_reward_timeout_steps: 1000 | |
| no_reward_fire_steps: 500 | |
| vec_env_class: async | |
| policy_hyperparams: | |
| activation_fn: relu | |
| algo_hyperparams: | |
| n_steps: 2048 | |
| pi_lr: !!float 5e-5 | |
| gamma: 0.99 | |
| gae_lambda: 0.95 | |
| val_lr: !!float 1e-4 | |
| train_v_iters: 80 | |
| max_grad_norm: 0.5 | |
| ent_coef: 0.01 | |
| eval_params: | |
| deterministic: false | |