c-bone commited on
Commit
1e610e6
·
verified ·
1 Parent(s): 047d8c4

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
.summary/0/events.out.tfevents.1744901470.Cyprien-Bone-UCL ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf232213ef535583a6ed8e1e0e2287069aff9c97a9aa2aa8b7fc653261c5a7b
3
+ size 40
.summary/0/events.out.tfevents.1744901887.Cyprien-Bone-UCL ADDED
File without changes
.summary/0/events.out.tfevents.1744901901.Cyprien-Bone-UCL ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19c3acdd07b3b2a42954ee9ba686d82a4a1409b8af806ff47ccb690e066da8a
3
+ size 7582
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sample-factory
3
+ tags:
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - sample-factory
7
+ model-index:
8
+ - name: APPO
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: doom_health_gathering_supreme
15
+ type: doom_health_gathering_supreme
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 3.94 +/- 0.57
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
24
+
25
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
27
+
28
+
29
+ ## Downloading the model
30
+
31
+ After installing Sample-Factory, download the model with:
32
+ ```
33
+ python -m sample_factory.huggingface.load_from_hub -r c-bone/rl_course_vizdoom_health_gathering_supreme
34
+ ```
35
+
36
+
37
+ ## Using the model
38
+
39
+ To run the model after download, use the `enjoy` script corresponding to this environment:
40
+ ```
41
+ python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme
42
+ ```
43
+
44
+
45
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
46
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
47
+
48
+ ## Training with this model
49
+
50
+ To continue training with this model, use the `train` script corresponding to this environment:
51
+ ```
52
+ python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme --restart_behavior=resume --train_for_env_steps=10000000000
53
+ ```
54
+
55
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
checkpoint_p0/checkpoint_000000000_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d00fb1a2bb3abae2a4e3257a57e24bbcdcadd1ae852097add4e09c4f6be78ade
3
+ size 11939285
checkpoint_p0/checkpoint_000000001_4096.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25bd7d6dca2576306bf7b451fb8263f4f3722b27e0840e4a5dcb4913aac874d7
3
+ size 34929349
config.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "help": false,
3
+ "algo": "APPO",
4
+ "env": "doom_health_gathering_supreme",
5
+ "experiment": "default_experiment",
6
+ "train_dir": "/home/uccacbo/Deep-RL-HF/train_dir",
7
+ "restart_behavior": "resume",
8
+ "device": "gpu",
9
+ "seed": null,
10
+ "num_policies": 1,
11
+ "async_rl": true,
12
+ "serial_mode": false,
13
+ "batched_sampling": false,
14
+ "num_batches_to_accumulate": 2,
15
+ "worker_num_splits": 2,
16
+ "policy_workers_per_policy": 1,
17
+ "max_policy_lag": 1000,
18
+ "num_workers": 1,
19
+ "num_envs_per_worker": 2,
20
+ "batch_size": 1024,
21
+ "num_batches_per_epoch": 1,
22
+ "num_epochs": 1,
23
+ "rollout": 32,
24
+ "recurrence": 32,
25
+ "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
+ "reward_scale": 1.0,
28
+ "reward_clip": 1000.0,
29
+ "value_bootstrap": false,
30
+ "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.001,
32
+ "value_loss_coeff": 0.5,
33
+ "kl_loss_coeff": 0.0,
34
+ "exploration_loss": "symmetric_kl",
35
+ "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.1,
37
+ "ppo_clip_value": 0.2,
38
+ "with_vtrace": false,
39
+ "vtrace_rho": 1.0,
40
+ "vtrace_c": 1.0,
41
+ "optimizer": "adam",
42
+ "adam_eps": 1e-06,
43
+ "adam_beta1": 0.9,
44
+ "adam_beta2": 0.999,
45
+ "max_grad_norm": 4.0,
46
+ "learning_rate": 0.0001,
47
+ "lr_schedule": "constant",
48
+ "lr_schedule_kl_threshold": 0.008,
49
+ "lr_adaptive_min": 1e-06,
50
+ "lr_adaptive_max": 0.01,
51
+ "obs_subtract_mean": 0.0,
52
+ "obs_scale": 255.0,
53
+ "normalize_input": true,
54
+ "normalize_input_keys": null,
55
+ "decorrelate_experience_max_seconds": 0,
56
+ "decorrelate_envs_on_one_worker": true,
57
+ "actor_worker_gpus": [],
58
+ "set_workers_cpu_affinity": true,
59
+ "force_envs_single_thread": false,
60
+ "default_niceness": 0,
61
+ "log_to_file": true,
62
+ "experiment_summaries_interval": 10,
63
+ "flush_summaries_interval": 30,
64
+ "stats_avg": 100,
65
+ "summaries_use_frameskip": true,
66
+ "heartbeat_interval": 20,
67
+ "heartbeat_reporting_interval": 600,
68
+ "train_for_env_steps": 4000,
69
+ "train_for_seconds": 10000000000,
70
+ "save_every_sec": 120,
71
+ "keep_checkpoints": 2,
72
+ "load_checkpoint_kind": "latest",
73
+ "save_milestones_sec": -1,
74
+ "save_best_every_sec": 5,
75
+ "save_best_metric": "reward",
76
+ "save_best_after": 100000,
77
+ "benchmark": false,
78
+ "encoder_mlp_layers": [
79
+ 512,
80
+ 512
81
+ ],
82
+ "encoder_conv_architecture": "convnet_simple",
83
+ "encoder_conv_mlp_layers": [
84
+ 512
85
+ ],
86
+ "use_rnn": true,
87
+ "rnn_size": 512,
88
+ "rnn_type": "gru",
89
+ "rnn_num_layers": 1,
90
+ "decoder_mlp_layers": [],
91
+ "nonlinearity": "elu",
92
+ "policy_initialization": "orthogonal",
93
+ "policy_init_gain": 1.0,
94
+ "actor_critic_share_weights": true,
95
+ "adaptive_stddev": true,
96
+ "continuous_tanh_scale": 0.0,
97
+ "initial_stddev": 1.0,
98
+ "use_env_info_cache": false,
99
+ "env_gpu_actions": false,
100
+ "env_gpu_observations": true,
101
+ "env_frameskip": 4,
102
+ "env_framestack": 1,
103
+ "pixel_format": "CHW",
104
+ "use_record_episode_statistics": false,
105
+ "with_wandb": false,
106
+ "wandb_user": null,
107
+ "wandb_project": "sample_factory",
108
+ "wandb_group": null,
109
+ "wandb_job_type": "SF",
110
+ "wandb_tags": [],
111
+ "with_pbt": false,
112
+ "pbt_mix_policies_in_one_env": true,
113
+ "pbt_period_env_steps": 5000000,
114
+ "pbt_start_mutation": 20000000,
115
+ "pbt_replace_fraction": 0.3,
116
+ "pbt_mutation_rate": 0.15,
117
+ "pbt_replace_reward_gap": 0.1,
118
+ "pbt_replace_reward_gap_absolute": 1e-06,
119
+ "pbt_optimize_gamma": false,
120
+ "pbt_target_objective": "true_objective",
121
+ "pbt_perturb_min": 1.1,
122
+ "pbt_perturb_max": 1.5,
123
+ "num_agents": -1,
124
+ "num_humans": 0,
125
+ "num_bots": -1,
126
+ "start_bot_difficulty": null,
127
+ "timelimit": null,
128
+ "res_w": 128,
129
+ "res_h": 72,
130
+ "wide_aspect_ratio": false,
131
+ "eval_env_frameskip": 1,
132
+ "fps": 35,
133
+ "command_line": "--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=400000",
134
+ "cli_args": {
135
+ "env": "doom_health_gathering_supreme",
136
+ "num_workers": 8,
137
+ "num_envs_per_worker": 4,
138
+ "train_for_env_steps": 400000
139
+ },
140
+ "git_hash": "unknown",
141
+ "git_repo_name": "not a git repository"
142
+ }
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ccefb1f584447e4c49efdc2d81038a36c6797a4a6b48a5831e78d7b85ae7580
3
+ size 5945675
sf_log.txt ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-04-17 15:51:14,150][38462] Saving configuration to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json...
2
+ [2025-04-17 15:51:14,151][38462] Rollout worker 0 uses device cpu
3
+ [2025-04-17 15:51:14,152][38462] Rollout worker 1 uses device cpu
4
+ [2025-04-17 15:51:14,152][38462] Rollout worker 2 uses device cpu
5
+ [2025-04-17 15:51:14,153][38462] Rollout worker 3 uses device cpu
6
+ [2025-04-17 15:51:14,154][38462] Rollout worker 4 uses device cpu
7
+ [2025-04-17 15:51:14,155][38462] Rollout worker 5 uses device cpu
8
+ [2025-04-17 15:51:14,156][38462] Rollout worker 6 uses device cpu
9
+ [2025-04-17 15:51:14,157][38462] Rollout worker 7 uses device cpu
10
+ [2025-04-17 15:51:14,281][38462] Using GPUs [0] for process 0 (actually maps to GPUs [0])
11
+ [2025-04-17 15:51:14,281][38462] InferenceWorker_p0-w0: min num requests: 2
12
+ [2025-04-17 15:51:14,301][38462] Starting all processes...
13
+ [2025-04-17 15:51:14,302][38462] Starting process learner_proc0
14
+ [2025-04-17 15:51:14,355][38462] Starting all processes...
15
+ [2025-04-17 15:51:14,362][38462] Starting process inference_proc0-0
16
+ [2025-04-17 15:51:14,362][38462] Starting process rollout_proc0
17
+ [2025-04-17 15:51:14,363][38462] Starting process rollout_proc1
18
+ [2025-04-17 15:51:14,364][38462] Starting process rollout_proc2
19
+ [2025-04-17 15:51:14,364][38462] Starting process rollout_proc3
20
+ [2025-04-17 15:51:14,365][38462] Starting process rollout_proc4
21
+ [2025-04-17 15:51:14,366][38462] Starting process rollout_proc5
22
+ [2025-04-17 15:51:14,366][38462] Starting process rollout_proc6
23
+ [2025-04-17 15:51:14,369][38462] Starting process rollout_proc7
24
+ [2025-04-17 15:51:20,105][48477] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
25
+ [2025-04-17 15:51:20,105][48473] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
26
+ [2025-04-17 15:51:20,105][48479] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
27
+ [2025-04-17 15:51:20,105][48474] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
28
+ [2025-04-17 15:51:20,105][48480] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
29
+ [2025-04-17 15:51:20,105][48475] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
30
+ [2025-04-17 15:51:20,105][48478] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
31
+ [2025-04-17 15:51:20,105][48476] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
32
+ [2025-04-17 15:51:20,106][48472] Using GPUs [0] for process 0 (actually maps to GPUs [0])
33
+ [2025-04-17 15:51:20,106][48459] Using GPUs [0] for process 0 (actually maps to GPUs [0])
34
+ [2025-04-17 15:51:20,106][48472] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
35
+ [2025-04-17 15:51:20,106][48459] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
36
+ [2025-04-17 15:51:20,204][48472] Num visible devices: 1
37
+ [2025-04-17 15:51:20,205][48459] Num visible devices: 1
38
+ [2025-04-17 15:51:20,206][48459] Starting seed is not provided
39
+ [2025-04-17 15:51:20,207][48459] Using GPUs [0] for process 0 (actually maps to GPUs [0])
40
+ [2025-04-17 15:51:20,207][48459] Initializing actor-critic model on device cuda:0
41
+ [2025-04-17 15:51:20,210][48459] RunningMeanStd input shape: (3, 72, 128)
42
+ [2025-04-17 15:51:20,226][48459] RunningMeanStd input shape: (1,)
43
+ [2025-04-17 15:51:20,270][48459] ConvEncoder: input_channels=3
44
+ [2025-04-17 15:51:20,552][48459] Conv encoder output size: 512
45
+ [2025-04-17 15:51:20,554][48459] Policy head output size: 512
46
+ [2025-04-17 15:51:20,632][48459] Created Actor Critic model with architecture:
47
+ [2025-04-17 15:51:20,636][48459] ActorCriticSharedWeights(
48
+ (obs_normalizer): ObservationNormalizer(
49
+ (running_mean_std): RunningMeanStdDictInPlace(
50
+ (running_mean_std): ModuleDict(
51
+ (obs): RunningMeanStdInPlace()
52
+ )
53
+ )
54
+ )
55
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
56
+ (encoder): VizdoomEncoder(
57
+ (basic_encoder): ConvEncoder(
58
+ (enc): RecursiveScriptModule(
59
+ original_name=ConvEncoderImpl
60
+ (conv_head): RecursiveScriptModule(
61
+ original_name=Sequential
62
+ (0): RecursiveScriptModule(original_name=Conv2d)
63
+ (1): RecursiveScriptModule(original_name=ELU)
64
+ (2): RecursiveScriptModule(original_name=Conv2d)
65
+ (3): RecursiveScriptModule(original_name=ELU)
66
+ (4): RecursiveScriptModule(original_name=Conv2d)
67
+ (5): RecursiveScriptModule(original_name=ELU)
68
+ )
69
+ (mlp_layers): RecursiveScriptModule(
70
+ original_name=Sequential
71
+ (0): RecursiveScriptModule(original_name=Linear)
72
+ (1): RecursiveScriptModule(original_name=ELU)
73
+ )
74
+ )
75
+ )
76
+ )
77
+ (core): ModelCoreRNN(
78
+ (core): GRU(512, 512)
79
+ )
80
+ (decoder): MlpDecoder(
81
+ (mlp): Identity()
82
+ )
83
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
84
+ (action_parameterization): ActionParameterizationDefault(
85
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
86
+ )
87
+ )
88
+ [2025-04-17 15:51:34,270][38462] Heartbeat connected on Batcher_0
89
+ [2025-04-17 15:51:34,463][38462] Heartbeat connected on RolloutWorker_w2
90
+ [2025-04-17 15:51:34,917][38462] Heartbeat connected on RolloutWorker_w1
91
+ [2025-04-17 15:51:35,512][38462] Heartbeat connected on RolloutWorker_w4
92
+ [2025-04-17 15:51:35,962][38462] Heartbeat connected on RolloutWorker_w3
93
+ [2025-04-17 15:51:36,514][38462] Heartbeat connected on RolloutWorker_w5
94
+ [2025-04-17 15:51:37,019][38462] Heartbeat connected on RolloutWorker_w0
95
+ [2025-04-17 15:51:37,729][38462] Heartbeat connected on InferenceWorker_p0-w0
96
+ [2025-04-17 15:51:38,109][38462] Heartbeat connected on RolloutWorker_w6
97
+ [2025-04-17 15:51:38,468][38462] Heartbeat connected on RolloutWorker_w7
98
+ [2025-04-17 15:52:29,514][38462] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 38462], exiting...
99
+ [2025-04-17 15:52:29,567][48479] Stopping RolloutWorker_w6...
100
+ [2025-04-17 15:52:29,567][48475] Stopping RolloutWorker_w2...
101
+ [2025-04-17 15:52:29,566][48478] Stopping RolloutWorker_w5...
102
+ [2025-04-17 15:52:29,567][48474] Stopping RolloutWorker_w1...
103
+ [2025-04-17 15:52:29,567][48476] Stopping RolloutWorker_w3...
104
+ [2025-04-17 15:52:29,566][48477] Stopping RolloutWorker_w4...
105
+ [2025-04-17 15:52:29,567][48475] Loop rollout_proc2_evt_loop terminating...
106
+ [2025-04-17 15:52:29,567][48479] Loop rollout_proc6_evt_loop terminating...
107
+ [2025-04-17 15:52:29,568][48478] Loop rollout_proc5_evt_loop terminating...
108
+ [2025-04-17 15:52:29,568][48476] Loop rollout_proc3_evt_loop terminating...
109
+ [2025-04-17 15:52:29,568][48474] Loop rollout_proc1_evt_loop terminating...
110
+ [2025-04-17 15:52:29,567][48480] Stopping RolloutWorker_w7...
111
+ [2025-04-17 15:52:29,568][48477] Loop rollout_proc4_evt_loop terminating...
112
+ [2025-04-17 15:52:29,567][48472] Stopping InferenceWorker_p0-w0...
113
+ [2025-04-17 15:52:29,568][48473] Stopping RolloutWorker_w0...
114
+ [2025-04-17 15:52:29,568][48480] Loop rollout_proc7_evt_loop terminating...
115
+ [2025-04-17 15:52:29,569][48472] Loop inference_proc0-0_evt_loop terminating...
116
+ [2025-04-17 15:52:29,569][48473] Loop rollout_proc0_evt_loop terminating...
117
+ [2025-04-17 15:52:29,568][48459] Stopping Batcher_0...
118
+ [2025-04-17 15:52:29,570][48459] Loop batcher_evt_loop terminating...
119
+ [2025-04-17 15:52:29,566][38462] Runner profile tree view:
120
+ main_loop: 75.2660
121
+ [2025-04-17 15:52:29,576][38462] Collected {}, FPS: 0.0
122
+ [2025-04-17 15:52:31,947][48459] Using optimizer <class 'torch.optim.adam.Adam'>
123
+ [2025-04-17 15:52:33,028][48459] No checkpoints found
124
+ [2025-04-17 15:52:33,028][48459] Did not load from checkpoint, starting from scratch!
125
+ [2025-04-17 15:52:33,029][48459] Initialized policy 0 weights for model version 0
126
+ [2025-04-17 15:52:33,037][48459] LearnerWorker_p0 finished initialization!
127
+ [2025-04-17 15:52:33,037][48459] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
128
+ [2025-04-17 15:52:33,058][48459] Stopping LearnerWorker_p0...
129
+ [2025-04-17 15:52:33,058][48459] Loop learner_proc0_evt_loop terminating...
130
+ [2025-04-17 15:58:07,874][38462] Environment doom_basic already registered, overwriting...
131
+ [2025-04-17 15:58:07,878][38462] Environment doom_two_colors_easy already registered, overwriting...
132
+ [2025-04-17 15:58:07,878][38462] Environment doom_two_colors_hard already registered, overwriting...
133
+ [2025-04-17 15:58:07,879][38462] Environment doom_dm already registered, overwriting...
134
+ [2025-04-17 15:58:07,880][38462] Environment doom_dwango5 already registered, overwriting...
135
+ [2025-04-17 15:58:07,881][38462] Environment doom_my_way_home_flat_actions already registered, overwriting...
136
+ [2025-04-17 15:58:07,882][38462] Environment doom_defend_the_center_flat_actions already registered, overwriting...
137
+ [2025-04-17 15:58:07,883][38462] Environment doom_my_way_home already registered, overwriting...
138
+ [2025-04-17 15:58:07,885][38462] Environment doom_deadly_corridor already registered, overwriting...
139
+ [2025-04-17 15:58:07,886][38462] Environment doom_defend_the_center already registered, overwriting...
140
+ [2025-04-17 15:58:07,887][38462] Environment doom_defend_the_line already registered, overwriting...
141
+ [2025-04-17 15:58:07,888][38462] Environment doom_health_gathering already registered, overwriting...
142
+ [2025-04-17 15:58:07,889][38462] Environment doom_health_gathering_supreme already registered, overwriting...
143
+ [2025-04-17 15:58:07,890][38462] Environment doom_battle already registered, overwriting...
144
+ [2025-04-17 15:58:07,891][38462] Environment doom_battle2 already registered, overwriting...
145
+ [2025-04-17 15:58:07,892][38462] Environment doom_duel_bots already registered, overwriting...
146
+ [2025-04-17 15:58:07,892][38462] Environment doom_deathmatch_bots already registered, overwriting...
147
+ [2025-04-17 15:58:07,894][38462] Environment doom_duel already registered, overwriting...
148
+ [2025-04-17 15:58:07,894][38462] Environment doom_deathmatch_full already registered, overwriting...
149
+ [2025-04-17 15:58:07,895][38462] Environment doom_benchmark already registered, overwriting...
150
+ [2025-04-17 15:58:07,896][38462] register_encoder_factory: <function make_vizdoom_encoder at 0x7fd13a31b250>
151
+ [2025-04-17 15:58:07,909][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
152
+ [2025-04-17 15:58:07,911][38462] Overriding arg 'num_workers' with value 1 passed from command line
153
+ [2025-04-17 15:58:07,913][38462] Overriding arg 'num_envs_per_worker' with value 1 passed from command line
154
+ [2025-04-17 15:58:07,913][38462] Overriding arg 'train_for_env_steps' with value 4000 passed from command line
155
+ [2025-04-17 15:58:07,921][38462] Experiment dir /home/uccacbo/Deep-RL-HF/train_dir/default_experiment already exists!
156
+ [2025-04-17 15:58:07,922][38462] Resuming existing experiment from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment...
157
+ [2025-04-17 15:58:07,924][38462] Weights and Biases integration disabled
158
+ [2025-04-17 15:58:07,933][38462] Environment var CUDA_VISIBLE_DEVICES is 0
159
+
160
+ [2025-04-17 15:58:10,470][38462] cfg.num_envs_per_worker=1 must be a multiple of cfg.worker_num_splits=2 (for double-buffered sampling you need to use even number of envs per worker)
161
+ [2025-04-17 15:58:21,350][38462] Environment doom_basic already registered, overwriting...
162
+ [2025-04-17 15:58:21,351][38462] Environment doom_two_colors_easy already registered, overwriting...
163
+ [2025-04-17 15:58:21,352][38462] Environment doom_two_colors_hard already registered, overwriting...
164
+ [2025-04-17 15:58:21,353][38462] Environment doom_dm already registered, overwriting...
165
+ [2025-04-17 15:58:21,354][38462] Environment doom_dwango5 already registered, overwriting...
166
+ [2025-04-17 15:58:21,354][38462] Environment doom_my_way_home_flat_actions already registered, overwriting...
167
+ [2025-04-17 15:58:21,355][38462] Environment doom_defend_the_center_flat_actions already registered, overwriting...
168
+ [2025-04-17 15:58:21,355][38462] Environment doom_my_way_home already registered, overwriting...
169
+ [2025-04-17 15:58:21,356][38462] Environment doom_deadly_corridor already registered, overwriting...
170
+ [2025-04-17 15:58:21,356][38462] Environment doom_defend_the_center already registered, overwriting...
171
+ [2025-04-17 15:58:21,357][38462] Environment doom_defend_the_line already registered, overwriting...
172
+ [2025-04-17 15:58:21,358][38462] Environment doom_health_gathering already registered, overwriting...
173
+ [2025-04-17 15:58:21,359][38462] Environment doom_health_gathering_supreme already registered, overwriting...
174
+ [2025-04-17 15:58:21,359][38462] Environment doom_battle already registered, overwriting...
175
+ [2025-04-17 15:58:21,360][38462] Environment doom_battle2 already registered, overwriting...
176
+ [2025-04-17 15:58:21,361][38462] Environment doom_duel_bots already registered, overwriting...
177
+ [2025-04-17 15:58:21,361][38462] Environment doom_deathmatch_bots already registered, overwriting...
178
+ [2025-04-17 15:58:21,362][38462] Environment doom_duel already registered, overwriting...
179
+ [2025-04-17 15:58:21,363][38462] Environment doom_deathmatch_full already registered, overwriting...
180
+ [2025-04-17 15:58:21,364][38462] Environment doom_benchmark already registered, overwriting...
181
+ [2025-04-17 15:58:21,365][38462] register_encoder_factory: <function make_vizdoom_encoder at 0x7fd13a31b250>
182
+ [2025-04-17 15:58:21,371][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
183
+ [2025-04-17 15:58:21,372][38462] Overriding arg 'num_workers' with value 1 passed from command line
184
+ [2025-04-17 15:58:21,373][38462] Overriding arg 'num_envs_per_worker' with value 2 passed from command line
185
+ [2025-04-17 15:58:21,373][38462] Overriding arg 'train_for_env_steps' with value 4000 passed from command line
186
+ [2025-04-17 15:58:21,378][38462] Experiment dir /home/uccacbo/Deep-RL-HF/train_dir/default_experiment already exists!
187
+ [2025-04-17 15:58:21,379][38462] Resuming existing experiment from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment...
188
+ [2025-04-17 15:58:21,380][38462] Weights and Biases integration disabled
189
+ [2025-04-17 15:58:21,382][38462] Environment var CUDA_VISIBLE_DEVICES is 0
190
+
191
+ [2025-04-17 15:58:22,980][38462] Starting experiment with the following configuration:
192
+ help=False
193
+ algo=APPO
194
+ env=doom_health_gathering_supreme
195
+ experiment=default_experiment
196
+ train_dir=/home/uccacbo/Deep-RL-HF/train_dir
197
+ restart_behavior=resume
198
+ device=gpu
199
+ seed=None
200
+ num_policies=1
201
+ async_rl=True
202
+ serial_mode=False
203
+ batched_sampling=False
204
+ num_batches_to_accumulate=2
205
+ worker_num_splits=2
206
+ policy_workers_per_policy=1
207
+ max_policy_lag=1000
208
+ num_workers=1
209
+ num_envs_per_worker=2
210
+ batch_size=1024
211
+ num_batches_per_epoch=1
212
+ num_epochs=1
213
+ rollout=32
214
+ recurrence=32
215
+ shuffle_minibatches=False
216
+ gamma=0.99
217
+ reward_scale=1.0
218
+ reward_clip=1000.0
219
+ value_bootstrap=False
220
+ normalize_returns=True
221
+ exploration_loss_coeff=0.001
222
+ value_loss_coeff=0.5
223
+ kl_loss_coeff=0.0
224
+ exploration_loss=symmetric_kl
225
+ gae_lambda=0.95
226
+ ppo_clip_ratio=0.1
227
+ ppo_clip_value=0.2
228
+ with_vtrace=False
229
+ vtrace_rho=1.0
230
+ vtrace_c=1.0
231
+ optimizer=adam
232
+ adam_eps=1e-06
233
+ adam_beta1=0.9
234
+ adam_beta2=0.999
235
+ max_grad_norm=4.0
236
+ learning_rate=0.0001
237
+ lr_schedule=constant
238
+ lr_schedule_kl_threshold=0.008
239
+ lr_adaptive_min=1e-06
240
+ lr_adaptive_max=0.01
241
+ obs_subtract_mean=0.0
242
+ obs_scale=255.0
243
+ normalize_input=True
244
+ normalize_input_keys=None
245
+ decorrelate_experience_max_seconds=0
246
+ decorrelate_envs_on_one_worker=True
247
+ actor_worker_gpus=[]
248
+ set_workers_cpu_affinity=True
249
+ force_envs_single_thread=False
250
+ default_niceness=0
251
+ log_to_file=True
252
+ experiment_summaries_interval=10
253
+ flush_summaries_interval=30
254
+ stats_avg=100
255
+ summaries_use_frameskip=True
256
+ heartbeat_interval=20
257
+ heartbeat_reporting_interval=600
258
+ train_for_env_steps=4000
259
+ train_for_seconds=10000000000
260
+ save_every_sec=120
261
+ keep_checkpoints=2
262
+ load_checkpoint_kind=latest
263
+ save_milestones_sec=-1
264
+ save_best_every_sec=5
265
+ save_best_metric=reward
266
+ save_best_after=100000
267
+ benchmark=False
268
+ encoder_mlp_layers=[512, 512]
269
+ encoder_conv_architecture=convnet_simple
270
+ encoder_conv_mlp_layers=[512]
271
+ use_rnn=True
272
+ rnn_size=512
273
+ rnn_type=gru
274
+ rnn_num_layers=1
275
+ decoder_mlp_layers=[]
276
+ nonlinearity=elu
277
+ policy_initialization=orthogonal
278
+ policy_init_gain=1.0
279
+ actor_critic_share_weights=True
280
+ adaptive_stddev=True
281
+ continuous_tanh_scale=0.0
282
+ initial_stddev=1.0
283
+ use_env_info_cache=False
284
+ env_gpu_actions=False
285
+ env_gpu_observations=True
286
+ env_frameskip=4
287
+ env_framestack=1
288
+ pixel_format=CHW
289
+ use_record_episode_statistics=False
290
+ with_wandb=False
291
+ wandb_user=None
292
+ wandb_project=sample_factory
293
+ wandb_group=None
294
+ wandb_job_type=SF
295
+ wandb_tags=[]
296
+ with_pbt=False
297
+ pbt_mix_policies_in_one_env=True
298
+ pbt_period_env_steps=5000000
299
+ pbt_start_mutation=20000000
300
+ pbt_replace_fraction=0.3
301
+ pbt_mutation_rate=0.15
302
+ pbt_replace_reward_gap=0.1
303
+ pbt_replace_reward_gap_absolute=1e-06
304
+ pbt_optimize_gamma=False
305
+ pbt_target_objective=true_objective
306
+ pbt_perturb_min=1.1
307
+ pbt_perturb_max=1.5
308
+ num_agents=-1
309
+ num_humans=0
310
+ num_bots=-1
311
+ start_bot_difficulty=None
312
+ timelimit=None
313
+ res_w=128
314
+ res_h=72
315
+ wide_aspect_ratio=False
316
+ eval_env_frameskip=1
317
+ fps=35
318
+ command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=400000
319
+ cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 400000}
320
+ git_hash=unknown
321
+ git_repo_name=not a git repository
322
+ [2025-04-17 15:58:22,981][38462] Saving configuration to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json...
323
+ [2025-04-17 15:58:22,982][38462] Rollout worker 0 uses device cpu
324
+ [2025-04-17 15:58:23,030][38462] Using GPUs [0] for process 0 (actually maps to GPUs [0])
325
+ [2025-04-17 15:58:23,031][38462] InferenceWorker_p0-w0: min num requests: 1
326
+ [2025-04-17 15:58:23,035][38462] Starting all processes...
327
+ [2025-04-17 15:58:23,035][38462] Starting process learner_proc0
328
+ [2025-04-17 15:58:23,085][38462] Starting all processes...
329
+ [2025-04-17 15:58:23,088][38462] Starting process inference_proc0-0
330
+ [2025-04-17 15:58:23,089][38462] Starting process rollout_proc0
331
+ [2025-04-17 15:58:24,649][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
332
+ [2025-04-17 15:58:24,649][51423] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
333
+ [2025-04-17 15:58:24,656][51429] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
334
+ [2025-04-17 15:58:24,671][51430] Using GPUs [0] for process 0 (actually maps to GPUs [0])
335
+ [2025-04-17 15:58:24,671][51430] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
336
+ [2025-04-17 15:58:24,718][51430] Num visible devices: 1
337
+ [2025-04-17 15:58:24,718][51423] Num visible devices: 1
338
+ [2025-04-17 15:58:24,719][51423] Starting seed is not provided
339
+ [2025-04-17 15:58:24,720][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
340
+ [2025-04-17 15:58:24,720][51423] Initializing actor-critic model on device cuda:0
341
+ [2025-04-17 15:58:24,720][51423] RunningMeanStd input shape: (3, 72, 128)
342
+ [2025-04-17 15:58:24,721][51423] RunningMeanStd input shape: (1,)
343
+ [2025-04-17 15:58:24,728][51423] ConvEncoder: input_channels=3
344
+ [2025-04-17 15:58:24,838][51423] Conv encoder output size: 512
345
+ [2025-04-17 15:58:24,839][51423] Policy head output size: 512
346
+ [2025-04-17 15:58:24,856][51423] Created Actor Critic model with architecture:
347
+ [2025-04-17 15:58:24,856][51423] ActorCriticSharedWeights(
348
+ (obs_normalizer): ObservationNormalizer(
349
+ (running_mean_std): RunningMeanStdDictInPlace(
350
+ (running_mean_std): ModuleDict(
351
+ (obs): RunningMeanStdInPlace()
352
+ )
353
+ )
354
+ )
355
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
356
+ (encoder): VizdoomEncoder(
357
+ (basic_encoder): ConvEncoder(
358
+ (enc): RecursiveScriptModule(
359
+ original_name=ConvEncoderImpl
360
+ (conv_head): RecursiveScriptModule(
361
+ original_name=Sequential
362
+ (0): RecursiveScriptModule(original_name=Conv2d)
363
+ (1): RecursiveScriptModule(original_name=ELU)
364
+ (2): RecursiveScriptModule(original_name=Conv2d)
365
+ (3): RecursiveScriptModule(original_name=ELU)
366
+ (4): RecursiveScriptModule(original_name=Conv2d)
367
+ (5): RecursiveScriptModule(original_name=ELU)
368
+ )
369
+ (mlp_layers): RecursiveScriptModule(
370
+ original_name=Sequential
371
+ (0): RecursiveScriptModule(original_name=Linear)
372
+ (1): RecursiveScriptModule(original_name=ELU)
373
+ )
374
+ )
375
+ )
376
+ )
377
+ (core): ModelCoreRNN(
378
+ (core): GRU(512, 512)
379
+ )
380
+ (decoder): MlpDecoder(
381
+ (mlp): Identity()
382
+ )
383
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
384
+ (action_parameterization): ActionParameterizationDefault(
385
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
386
+ )
387
+ )
388
+ [2025-04-17 15:58:25,270][51423] Using optimizer <class 'torch.optim.adam.Adam'>
389
+ [2025-04-17 15:58:26,272][51423] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
390
+ [2025-04-17 15:58:26,309][51423] Loading model from checkpoint
391
+ [2025-04-17 15:58:26,310][51423] Loaded experiment state at self.train_step=0, self.env_steps=0
392
+ [2025-04-17 15:58:26,310][51423] Initialized policy 0 weights for model version 0
393
+ [2025-04-17 15:58:26,315][51423] LearnerWorker_p0 finished initialization!
394
+ [2025-04-17 15:58:26,316][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
395
+ [2025-04-17 15:58:26,382][38462] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
396
+ [2025-04-17 15:58:26,480][51430] RunningMeanStd input shape: (3, 72, 128)
397
+ [2025-04-17 15:58:26,481][51430] RunningMeanStd input shape: (1,)
398
+ [2025-04-17 15:58:26,488][51430] ConvEncoder: input_channels=3
399
+ [2025-04-17 15:58:26,556][51430] Conv encoder output size: 512
400
+ [2025-04-17 15:58:26,556][51430] Policy head output size: 512
401
+ [2025-04-17 15:58:26,595][38462] Inference worker 0-0 is ready!
402
+ [2025-04-17 15:58:26,596][38462] All inference workers are ready! Signal rollout workers to start!
403
+ [2025-04-17 15:58:26,692][51429] Doom resolution: 160x120, resize resolution: (128, 72)
404
+ [2025-04-17 15:58:26,978][51429] Decorrelating experience for 0 frames...
405
+ [2025-04-17 15:58:27,129][51429] Decorrelating experience for 32 frames...
406
+ [2025-04-17 15:58:31,382][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 72.2. Samples: 361. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
407
+ [2025-04-17 15:58:31,385][38462] Avg episode reward: [(0, '4.080')]
408
+ [2025-04-17 15:58:36,385][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 167.4. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
409
+ [2025-04-17 15:58:36,420][38462] Avg episode reward: [(0, '4.191')]
410
+ [2025-04-17 15:58:41,389][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 111.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
411
+ [2025-04-17 15:58:41,405][38462] Avg episode reward: [(0, '4.191')]
412
+ [2025-04-17 15:58:43,078][38462] Heartbeat connected on Batcher_0
413
+ [2025-04-17 15:58:43,145][38462] Heartbeat connected on RolloutWorker_w0
414
+ [2025-04-17 15:58:46,385][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 83.7. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
415
+ [2025-04-17 15:58:46,453][38462] Avg episode reward: [(0, '4.191')]
416
+ [2025-04-17 15:58:51,549][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 66.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
417
+ [2025-04-17 15:58:52,038][38462] Avg episode reward: [(0, '4.191')]
418
+ [2025-04-17 15:58:56,514][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 55.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
419
+ [2025-04-17 15:58:56,762][38462] Avg episode reward: [(0, '4.191')]
420
+ [2025-04-17 15:59:01,432][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 47.8. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
421
+ [2025-04-17 15:59:01,678][38462] Avg episode reward: [(0, '4.191')]
422
+ [2025-04-17 15:59:06,401][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 41.8. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
423
+ [2025-04-17 15:59:06,881][38462] Avg episode reward: [(0, '4.191')]
424
+ [2025-04-17 15:59:11,636][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 37.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
425
+ [2025-04-17 15:59:12,123][38462] Avg episode reward: [(0, '4.191')]
426
+ [2025-04-17 15:59:16,672][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 29.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
427
+ [2025-04-17 15:59:17,010][38462] Avg episode reward: [(0, '4.191')]
428
+ [2025-04-17 15:59:23,230][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
429
+ [2025-04-17 15:59:23,341][38462] Avg episode reward: [(0, '4.191')]
430
+ [2025-04-17 15:59:26,435][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
431
+ [2025-04-17 15:59:26,561][38462] Avg episode reward: [(0, '4.191')]
432
+ [2025-04-17 15:59:31,394][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
433
+ [2025-04-17 15:59:31,468][38462] Avg episode reward: [(0, '4.191')]
434
+ [2025-04-17 15:59:36,412][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
435
+ [2025-04-17 15:59:36,464][38462] Avg episode reward: [(0, '4.191')]
436
+ [2025-04-17 15:59:41,432][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
437
+ [2025-04-17 15:59:41,842][38462] Avg episode reward: [(0, '4.191')]
438
+ [2025-04-17 15:59:46,467][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
439
+ [2025-04-17 15:59:46,605][38462] Avg episode reward: [(0, '4.191')]
440
+ [2025-04-17 15:59:51,396][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
441
+ [2025-04-17 15:59:51,426][38462] Avg episode reward: [(0, '4.191')]
442
+ [2025-04-17 15:59:57,740][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
443
+ [2025-04-17 15:59:58,246][38462] Avg episode reward: [(0, '4.191')]
444
+ [2025-04-17 16:00:01,482][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
445
+ [2025-04-17 16:00:01,776][38462] Avg episode reward: [(0, '4.191')]
446
+ [2025-04-17 16:00:06,505][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
447
+ [2025-04-17 16:00:06,724][38462] Avg episode reward: [(0, '4.191')]
448
+ [2025-04-17 16:00:11,616][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
449
+ [2025-04-17 16:00:11,940][38462] Avg episode reward: [(0, '4.191')]
450
+ [2025-04-17 16:00:16,522][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
451
+ [2025-04-17 16:00:16,973][38462] Avg episode reward: [(0, '4.191')]
452
+ [2025-04-17 16:00:21,574][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
453
+ [2025-04-17 16:00:21,990][38462] Avg episode reward: [(0, '4.191')]
454
+ [2025-04-17 16:00:26,518][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
455
+ [2025-04-17 16:00:26,894][38462] Avg episode reward: [(0, '4.191')]
456
+ [2025-04-17 16:00:32,477][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
457
+ [2025-04-17 16:00:33,016][38462] Avg episode reward: [(0, '4.191')]
458
+ [2025-04-17 16:00:36,512][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
459
+ [2025-04-17 16:00:36,813][38462] Avg episode reward: [(0, '4.191')]
460
+ [2025-04-17 16:00:41,673][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
461
+ [2025-04-17 16:00:42,174][38462] Avg episode reward: [(0, '4.191')]
462
+ [2025-04-17 16:00:46,558][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
463
+ [2025-04-17 16:00:46,922][38462] Avg episode reward: [(0, '4.191')]
464
+ [2025-04-17 16:00:51,544][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
465
+ [2025-04-17 16:00:51,982][38462] Avg episode reward: [(0, '4.191')]
466
+ [2025-04-17 16:00:56,573][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
467
+ [2025-04-17 16:00:56,913][38462] Avg episode reward: [(0, '4.191')]
468
+ [2025-04-17 16:01:01,408][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
469
+ [2025-04-17 16:01:01,494][38462] Avg episode reward: [(0, '4.191')]
470
+ [2025-04-17 16:01:06,768][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
471
+ [2025-04-17 16:01:07,035][38462] Avg episode reward: [(0, '4.191')]
472
+ [2025-04-17 16:01:11,481][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
473
+ [2025-04-17 16:01:11,860][38462] Avg episode reward: [(0, '4.191')]
474
+ [2025-04-17 16:01:16,526][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
475
+ [2025-04-17 16:01:17,049][38462] Avg episode reward: [(0, '4.191')]
476
+ [2025-04-17 16:01:21,568][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
477
+ [2025-04-17 16:01:22,025][38462] Avg episode reward: [(0, '4.191')]
478
+ [2025-04-17 16:01:26,577][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
479
+ [2025-04-17 16:01:27,041][38462] Avg episode reward: [(0, '4.191')]
480
+ [2025-04-17 16:01:32,480][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
481
+ [2025-04-17 16:01:37,774][38462] Avg episode reward: [(0, '4.191')]
482
+ [2025-04-17 16:01:41,829][38462] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 38462], exiting...
483
+ [2025-04-17 16:01:41,835][51423] Stopping Batcher_0...
484
+ [2025-04-17 16:01:41,836][51423] Loop batcher_evt_loop terminating...
485
+ [2025-04-17 16:01:41,835][38462] Runner profile tree view:
486
+ main_loop: 198.8007
487
+ [2025-04-17 16:01:41,838][38462] Collected {0: 0}, FPS: 0.0
488
+ [2025-04-17 16:01:41,897][51429] Stopping RolloutWorker_w0...
489
+ [2025-04-17 16:01:41,900][51429] Loop rollout_proc0_evt_loop terminating...
490
+ [2025-04-17 16:01:42,285][51430] Weights refcount: 2 0
491
+ [2025-04-17 16:01:42,292][51430] Stopping InferenceWorker_p0-w0...
492
+ [2025-04-17 16:01:42,293][51430] Loop inference_proc0-0_evt_loop terminating...
493
+ [2025-04-17 16:01:42,343][51423] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
494
+ [2025-04-17 16:01:42,607][51423] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
495
+ [2025-04-17 16:01:42,772][51423] Stopping LearnerWorker_p0...
496
+ [2025-04-17 16:01:42,772][51423] Loop learner_proc0_evt_loop terminating...
497
+ [2025-04-17 16:02:52,360][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
498
+ [2025-04-17 16:02:52,360][38462] Adding new argument 'no_render'=True that is not in the saved config file!
499
+ [2025-04-17 16:02:52,361][38462] Adding new argument 'save_video'=True that is not in the saved config file!
500
+ [2025-04-17 16:02:52,362][38462] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
501
+ [2025-04-17 16:02:52,362][38462] Adding new argument 'video_name'=None that is not in the saved config file!
502
+ [2025-04-17 16:02:52,362][38462] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
503
+ [2025-04-17 16:02:52,363][38462] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
504
+ [2025-04-17 16:02:52,364][38462] Adding new argument 'push_to_hub'=False that is not in the saved config file!
505
+ [2025-04-17 16:02:52,365][38462] Adding new argument 'hf_repository'=None that is not in the saved config file!
506
+ [2025-04-17 16:02:52,365][38462] Adding new argument 'policy_index'=0 that is not in the saved config file!
507
+ [2025-04-17 16:02:52,366][38462] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
508
+ [2025-04-17 16:02:52,367][38462] Adding new argument 'train_script'=None that is not in the saved config file!
509
+ [2025-04-17 16:02:52,368][38462] Adding new argument 'enjoy_script'=None that is not in the saved config file!
510
+ [2025-04-17 16:02:52,369][38462] Using frameskip 1 and render_action_repeat=4 for evaluation
511
+ [2025-04-17 16:02:52,401][38462] Doom resolution: 160x120, resize resolution: (128, 72)
512
+ [2025-04-17 16:02:52,406][38462] RunningMeanStd input shape: (3, 72, 128)
513
+ [2025-04-17 16:02:52,409][38462] RunningMeanStd input shape: (1,)
514
+ [2025-04-17 16:02:52,439][38462] ConvEncoder: input_channels=3
515
+ [2025-04-17 16:02:52,562][38462] Conv encoder output size: 512
516
+ [2025-04-17 16:02:52,562][38462] Policy head output size: 512
517
+ [2025-04-17 16:02:53,101][38462] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
518
+ [2025-04-17 16:02:53,978][38462] Num frames 100...
519
+ [2025-04-17 16:02:54,078][38462] Num frames 200...
520
+ [2025-04-17 16:02:54,181][38462] Num frames 300...
521
+ [2025-04-17 16:02:54,318][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
522
+ [2025-04-17 16:02:54,319][38462] Avg episode reward: 3.840, avg true_objective: 3.840
523
+ [2025-04-17 16:02:54,342][38462] Num frames 400...
524
+ [2025-04-17 16:02:54,452][38462] Num frames 500...
525
+ [2025-04-17 16:02:54,546][38462] Num frames 600...
526
+ [2025-04-17 16:02:54,647][38462] Num frames 700...
527
+ [2025-04-17 16:02:54,754][38462] Num frames 800...
528
+ [2025-04-17 16:02:54,846][38462] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
529
+ [2025-04-17 16:02:54,847][38462] Avg episode reward: 4.660, avg true_objective: 4.160
530
+ [2025-04-17 16:02:54,921][38462] Num frames 900...
531
+ [2025-04-17 16:02:55,019][38462] Num frames 1000...
532
+ [2025-04-17 16:02:55,120][38462] Num frames 1100...
533
+ [2025-04-17 16:02:55,219][38462] Num frames 1200...
534
+ [2025-04-17 16:02:55,293][38462] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
535
+ [2025-04-17 16:02:55,295][38462] Avg episode reward: 4.387, avg true_objective: 4.053
536
+ [2025-04-17 16:02:55,393][38462] Num frames 1300...
537
+ [2025-04-17 16:02:55,488][38462] Num frames 1400...
538
+ [2025-04-17 16:02:55,594][38462] Num frames 1500...
539
+ [2025-04-17 16:02:55,695][38462] Num frames 1600...
540
+ [2025-04-17 16:02:55,780][38462] Avg episode rewards: #0: 4.580, true rewards: #0: 4.080
541
+ [2025-04-17 16:02:55,781][38462] Avg episode reward: 4.580, avg true_objective: 4.080
542
+ [2025-04-17 16:02:55,847][38462] Num frames 1700...
543
+ [2025-04-17 16:02:55,945][38462] Num frames 1800...
544
+ [2025-04-17 16:02:56,038][38462] Num frames 1900...
545
+ [2025-04-17 16:02:56,132][38462] Num frames 2000...
546
+ [2025-04-17 16:02:56,203][38462] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
547
+ [2025-04-17 16:02:56,204][38462] Avg episode reward: 4.432, avg true_objective: 4.032
548
+ [2025-04-17 16:02:56,288][38462] Num frames 2100...
549
+ [2025-04-17 16:02:56,378][38462] Num frames 2200...
550
+ [2025-04-17 16:02:56,480][38462] Num frames 2300...
551
+ [2025-04-17 16:02:56,581][38462] Num frames 2400...
552
+ [2025-04-17 16:02:56,703][38462] Avg episode rewards: #0: 4.607, true rewards: #0: 4.107
553
+ [2025-04-17 16:02:56,704][38462] Avg episode reward: 4.607, avg true_objective: 4.107
554
+ [2025-04-17 16:02:56,756][38462] Num frames 2500...
555
+ [2025-04-17 16:02:56,866][38462] Num frames 2600...
556
+ [2025-04-17 16:02:56,971][38462] Num frames 2700...
557
+ [2025-04-17 16:02:57,079][38462] Num frames 2800...
558
+ [2025-04-17 16:02:57,183][38462] Avg episode rewards: #0: 4.497, true rewards: #0: 4.069
559
+ [2025-04-17 16:02:57,184][38462] Avg episode reward: 4.497, avg true_objective: 4.069
560
+ [2025-04-17 16:02:57,247][38462] Num frames 2900...
561
+ [2025-04-17 16:02:57,379][38462] Num frames 3000...
562
+ [2025-04-17 16:02:57,473][38462] Num frames 3100...
563
+ [2025-04-17 16:02:57,573][38462] Num frames 3200...
564
+ [2025-04-17 16:02:57,667][38462] Avg episode rewards: #0: 4.415, true rewards: #0: 4.040
565
+ [2025-04-17 16:02:57,668][38462] Avg episode reward: 4.415, avg true_objective: 4.040
566
+ [2025-04-17 16:02:57,740][38462] Num frames 3300...
567
+ [2025-04-17 16:02:57,834][38462] Num frames 3400...
568
+ [2025-04-17 16:02:57,924][38462] Num frames 3500...
569
+ [2025-04-17 16:02:58,021][38462] Num frames 3600...
570
+ [2025-04-17 16:02:58,089][38462] Avg episode rewards: #0: 4.351, true rewards: #0: 4.018
571
+ [2025-04-17 16:02:58,090][38462] Avg episode reward: 4.351, avg true_objective: 4.018
572
+ [2025-04-17 16:02:58,178][38462] Num frames 3700...
573
+ [2025-04-17 16:02:58,280][38462] Num frames 3800...
574
+ [2025-04-17 16:02:58,385][38462] Num frames 3900...
575
+ [2025-04-17 16:02:58,479][38462] Num frames 4000...
576
+ [2025-04-17 16:02:58,531][38462] Avg episode rewards: #0: 4.300, true rewards: #0: 4.000
577
+ [2025-04-17 16:02:58,532][38462] Avg episode reward: 4.300, avg true_objective: 4.000
578
+ [2025-04-17 16:03:03,627][38462] Replay video saved to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/replay.mp4!
579
+ [2025-04-17 16:04:25,402][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
580
+ [2025-04-17 16:04:25,403][38462] Adding new argument 'no_render'=True that is not in the saved config file!
581
+ [2025-04-17 16:04:25,404][38462] Adding new argument 'save_video'=True that is not in the saved config file!
582
+ [2025-04-17 16:04:25,405][38462] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
583
+ [2025-04-17 16:04:25,406][38462] Adding new argument 'video_name'=None that is not in the saved config file!
584
+ [2025-04-17 16:04:25,406][38462] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
585
+ [2025-04-17 16:04:25,407][38462] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
586
+ [2025-04-17 16:04:25,408][38462] Adding new argument 'push_to_hub'=True that is not in the saved config file!
587
+ [2025-04-17 16:04:25,409][38462] Adding new argument 'hf_repository'='c-bone/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
588
+ [2025-04-17 16:04:25,410][38462] Adding new argument 'policy_index'=0 that is not in the saved config file!
589
+ [2025-04-17 16:04:25,410][38462] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
590
+ [2025-04-17 16:04:25,411][38462] Adding new argument 'train_script'=None that is not in the saved config file!
591
+ [2025-04-17 16:04:25,412][38462] Adding new argument 'enjoy_script'=None that is not in the saved config file!
592
+ [2025-04-17 16:04:25,413][38462] Using frameskip 1 and render_action_repeat=4 for evaluation
593
+ [2025-04-17 16:04:25,462][38462] RunningMeanStd input shape: (3, 72, 128)
594
+ [2025-04-17 16:04:25,467][38462] RunningMeanStd input shape: (1,)
595
+ [2025-04-17 16:04:25,484][38462] ConvEncoder: input_channels=3
596
+ [2025-04-17 16:04:25,521][38462] Conv encoder output size: 512
597
+ [2025-04-17 16:04:25,522][38462] Policy head output size: 512
598
+ [2025-04-17 16:04:25,546][38462] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
599
+ [2025-04-17 16:04:26,051][38462] Num frames 100...
600
+ [2025-04-17 16:04:26,217][38462] Num frames 200...
601
+ [2025-04-17 16:04:26,369][38462] Num frames 300...
602
+ [2025-04-17 16:04:26,574][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
603
+ [2025-04-17 16:04:26,575][38462] Avg episode reward: 3.840, avg true_objective: 3.840
604
+ [2025-04-17 16:04:26,605][38462] Num frames 400...
605
+ [2025-04-17 16:04:26,774][38462] Num frames 500...
606
+ [2025-04-17 16:04:26,945][38462] Num frames 600...
607
+ [2025-04-17 16:04:27,102][38462] Num frames 700...
608
+ [2025-04-17 16:04:27,229][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
609
+ [2025-04-17 16:04:27,230][38462] Avg episode reward: 3.840, avg true_objective: 3.840
610
+ [2025-04-17 16:04:27,286][38462] Num frames 800...
611
+ [2025-04-17 16:04:27,450][38462] Num frames 900...
612
+ [2025-04-17 16:04:27,620][38462] Num frames 1000...
613
+ [2025-04-17 16:04:27,798][38462] Num frames 1100...
614
+ [2025-04-17 16:04:27,952][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
615
+ [2025-04-17 16:04:27,953][38462] Avg episode reward: 3.840, avg true_objective: 3.840
616
+ [2025-04-17 16:04:28,021][38462] Num frames 1200...
617
+ [2025-04-17 16:04:28,161][38462] Num frames 1300...
618
+ [2025-04-17 16:04:28,333][38462] Num frames 1400...
619
+ [2025-04-17 16:04:28,505][38462] Num frames 1500...
620
+ [2025-04-17 16:04:28,620][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
621
+ [2025-04-17 16:04:28,621][38462] Avg episode reward: 3.840, avg true_objective: 3.840
622
+ [2025-04-17 16:04:28,729][38462] Num frames 1600...
623
+ [2025-04-17 16:04:28,895][38462] Num frames 1700...
624
+ [2025-04-17 16:04:29,022][38462] Num frames 1800...
625
+ [2025-04-17 16:04:29,187][38462] Num frames 1900...
626
+ [2025-04-17 16:04:29,376][38462] Avg episode rewards: #0: 4.168, true rewards: #0: 3.968
627
+ [2025-04-17 16:04:29,378][38462] Avg episode reward: 4.168, avg true_objective: 3.968
628
+ [2025-04-17 16:04:29,407][38462] Num frames 2000...
629
+ [2025-04-17 16:04:29,574][38462] Num frames 2100...
630
+ [2025-04-17 16:04:29,755][38462] Num frames 2200...
631
+ [2025-04-17 16:04:29,866][38462] Avg episode rewards: #0: 3.900, true rewards: #0: 3.733
632
+ [2025-04-17 16:04:29,867][38462] Avg episode reward: 3.900, avg true_objective: 3.733
633
+ [2025-04-17 16:04:29,984][38462] Num frames 2300...
634
+ [2025-04-17 16:04:30,150][38462] Num frames 2400...
635
+ [2025-04-17 16:04:30,311][38462] Num frames 2500...
636
+ [2025-04-17 16:04:30,476][38462] Num frames 2600...
637
+ [2025-04-17 16:04:30,565][38462] Avg episode rewards: #0: 3.891, true rewards: #0: 3.749
638
+ [2025-04-17 16:04:30,566][38462] Avg episode reward: 3.891, avg true_objective: 3.749
639
+ [2025-04-17 16:04:30,690][38462] Num frames 2700...
640
+ [2025-04-17 16:04:30,816][38462] Num frames 2800...
641
+ [2025-04-17 16:04:30,980][38462] Num frames 2900...
642
+ [2025-04-17 16:04:31,143][38462] Num frames 3000...
643
+ [2025-04-17 16:04:31,311][38462] Avg episode rewards: #0: 4.090, true rewards: #0: 3.840
644
+ [2025-04-17 16:04:31,312][38462] Avg episode reward: 4.090, avg true_objective: 3.840
645
+ [2025-04-17 16:04:31,360][38462] Num frames 3100...
646
+ [2025-04-17 16:04:33,597][38462] Num frames 3200...
647
+ [2025-04-17 16:04:33,721][38462] Num frames 3300...
648
+ [2025-04-17 16:04:33,886][38462] Num frames 3400...
649
+ [2025-04-17 16:04:34,033][38462] Avg episode rewards: #0: 4.062, true rewards: #0: 3.840
650
+ [2025-04-17 16:04:34,034][38462] Avg episode reward: 4.062, avg true_objective: 3.840
651
+ [2025-04-17 16:04:34,108][38462] Num frames 3500...
652
+ [2025-04-17 16:04:34,275][38462] Num frames 3600...
653
+ [2025-04-17 16:04:34,440][38462] Num frames 3700...
654
+ [2025-04-17 16:04:34,579][38462] Num frames 3800...
655
+ [2025-04-17 16:04:34,732][38462] Num frames 3900...
656
+ [2025-04-17 16:04:34,846][38462] Avg episode rewards: #0: 4.336, true rewards: #0: 3.936
657
+ [2025-04-17 16:04:34,847][38462] Avg episode reward: 4.336, avg true_objective: 3.936
658
+ [2025-04-17 16:04:40,292][38462] Replay video saved to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/replay.mp4!