Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.gitattributes +1 -0
.summary/0/events.out.tfevents.1744901470.Cyprien-Bone-UCL +3 -0
.summary/0/events.out.tfevents.1744901887.Cyprien-Bone-UCL +0 -0
.summary/0/events.out.tfevents.1744901901.Cyprien-Bone-UCL +3 -0
README.md +56 -0
checkpoint_p0/checkpoint_000000000_0.pth +3 -0
checkpoint_p0/checkpoint_000000001_4096.pth +3 -0
config.json +142 -0
replay.mp4 +3 -0
sf_log.txt +658 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+replay.mp4 filter=lfs diff=lfs merge=lfs -text

.summary/0/events.out.tfevents.1744901470.Cyprien-Bone-UCL ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcf232213ef535583a6ed8e1e0e2287069aff9c97a9aa2aa8b7fc653261c5a7b
+size 40

.summary/0/events.out.tfevents.1744901887.Cyprien-Bone-UCL ADDED Viewed

File without changes

.summary/0/events.out.tfevents.1744901901.Cyprien-Bone-UCL ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f19c3acdd07b3b2a42954ee9ba686d82a4a1409b8af806ff47ccb690e066da8a
+size 7582

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+library_name: sample-factory
+tags:
+- deep-reinforcement-learning
+- reinforcement-learning
+- sample-factory
+model-index:
+- name: APPO
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: doom_health_gathering_supreme
+      type: doom_health_gathering_supreme
+    metrics:
+    - type: mean_reward
+      value: 3.94 +/- 0.57
+      name: mean_reward
+      verified: false
+---
+A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
+This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
+Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
+## Downloading the model
+After installing Sample-Factory, download the model with:
+```
+python -m sample_factory.huggingface.load_from_hub -r c-bone/rl_course_vizdoom_health_gathering_supreme
+```
+## Using the model
+To run the model after download, use the `enjoy` script corresponding to this environment:
+```
+python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme
+```
+You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
+See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+## Training with this model
+To continue training with this model, use the `train` script corresponding to this environment:
+```
+python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme --restart_behavior=resume --train_for_env_steps=10000000000
+```
+Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.

checkpoint_p0/checkpoint_000000000_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d00fb1a2bb3abae2a4e3257a57e24bbcdcadd1ae852097add4e09c4f6be78ade
+size 11939285

checkpoint_p0/checkpoint_000000001_4096.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25bd7d6dca2576306bf7b451fb8263f4f3722b27e0840e4a5dcb4913aac874d7
+size 34929349

config.json ADDED Viewed

	@@ -0,0 +1,142 @@

+{
+  "help": false,
+  "algo": "APPO",
+  "env": "doom_health_gathering_supreme",
+  "experiment": "default_experiment",
+  "train_dir": "/home/uccacbo/Deep-RL-HF/train_dir",
+  "restart_behavior": "resume",
+  "device": "gpu",
+  "seed": null,
+  "num_policies": 1,
+  "async_rl": true,
+  "serial_mode": false,
+  "batched_sampling": false,
+  "num_batches_to_accumulate": 2,
+  "worker_num_splits": 2,
+  "policy_workers_per_policy": 1,
+  "max_policy_lag": 1000,
+  "num_workers": 1,
+  "num_envs_per_worker": 2,
+  "batch_size": 1024,
+  "num_batches_per_epoch": 1,
+  "num_epochs": 1,
+  "rollout": 32,
+  "recurrence": 32,
+  "shuffle_minibatches": false,
+  "gamma": 0.99,
+  "reward_scale": 1.0,
+  "reward_clip": 1000.0,
+  "value_bootstrap": false,
+  "normalize_returns": true,
+  "exploration_loss_coeff": 0.001,
+  "value_loss_coeff": 0.5,
+  "kl_loss_coeff": 0.0,
+  "exploration_loss": "symmetric_kl",
+  "gae_lambda": 0.95,
+  "ppo_clip_ratio": 0.1,
+  "ppo_clip_value": 0.2,
+  "with_vtrace": false,
+  "vtrace_rho": 1.0,
+  "vtrace_c": 1.0,
+  "optimizer": "adam",
+  "adam_eps": 1e-06,
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "max_grad_norm": 4.0,
+  "learning_rate": 0.0001,
+  "lr_schedule": "constant",
+  "lr_schedule_kl_threshold": 0.008,
+  "lr_adaptive_min": 1e-06,
+  "lr_adaptive_max": 0.01,
+  "obs_subtract_mean": 0.0,
+  "obs_scale": 255.0,
+  "normalize_input": true,
+  "normalize_input_keys": null,
+  "decorrelate_experience_max_seconds": 0,
+  "decorrelate_envs_on_one_worker": true,
+  "actor_worker_gpus": [],
+  "set_workers_cpu_affinity": true,
+  "force_envs_single_thread": false,
+  "default_niceness": 0,
+  "log_to_file": true,
+  "experiment_summaries_interval": 10,
+  "flush_summaries_interval": 30,
+  "stats_avg": 100,
+  "summaries_use_frameskip": true,
+  "heartbeat_interval": 20,
+  "heartbeat_reporting_interval": 600,
+  "train_for_env_steps": 4000,
+  "train_for_seconds": 10000000000,
+  "save_every_sec": 120,
+  "keep_checkpoints": 2,
+  "load_checkpoint_kind": "latest",
+  "save_milestones_sec": -1,
+  "save_best_every_sec": 5,
+  "save_best_metric": "reward",
+  "save_best_after": 100000,
+  "benchmark": false,
+  "encoder_mlp_layers": [
+    512,
+    512
+  ],
+  "encoder_conv_architecture": "convnet_simple",
+  "encoder_conv_mlp_layers": [
+    512
+  ],
+  "use_rnn": true,
+  "rnn_size": 512,
+  "rnn_type": "gru",
+  "rnn_num_layers": 1,
+  "decoder_mlp_layers": [],
+  "nonlinearity": "elu",
+  "policy_initialization": "orthogonal",
+  "policy_init_gain": 1.0,
+  "actor_critic_share_weights": true,
+  "adaptive_stddev": true,
+  "continuous_tanh_scale": 0.0,
+  "initial_stddev": 1.0,
+  "use_env_info_cache": false,
+  "env_gpu_actions": false,
+  "env_gpu_observations": true,
+  "env_frameskip": 4,
+  "env_framestack": 1,
+  "pixel_format": "CHW",
+  "use_record_episode_statistics": false,
+  "with_wandb": false,
+  "wandb_user": null,
+  "wandb_project": "sample_factory",
+  "wandb_group": null,
+  "wandb_job_type": "SF",
+  "wandb_tags": [],
+  "with_pbt": false,
+  "pbt_mix_policies_in_one_env": true,
+  "pbt_period_env_steps": 5000000,
+  "pbt_start_mutation": 20000000,
+  "pbt_replace_fraction": 0.3,
+  "pbt_mutation_rate": 0.15,
+  "pbt_replace_reward_gap": 0.1,
+  "pbt_replace_reward_gap_absolute": 1e-06,
+  "pbt_optimize_gamma": false,
+  "pbt_target_objective": "true_objective",
+  "pbt_perturb_min": 1.1,
+  "pbt_perturb_max": 1.5,
+  "num_agents": -1,
+  "num_humans": 0,
+  "num_bots": -1,
+  "start_bot_difficulty": null,
+  "timelimit": null,
+  "res_w": 128,
+  "res_h": 72,
+  "wide_aspect_ratio": false,
+  "eval_env_frameskip": 1,
+  "fps": 35,
+  "command_line": "--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=400000",
+  "cli_args": {
+    "env": "doom_health_gathering_supreme",
+    "num_workers": 8,
+    "num_envs_per_worker": 4,
+    "train_for_env_steps": 400000
+  },
+  "git_hash": "unknown",
+  "git_repo_name": "not a git repository"
+}

replay.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ccefb1f584447e4c49efdc2d81038a36c6797a4a6b48a5831e78d7b85ae7580
+size 5945675

sf_log.txt ADDED Viewed

	@@ -0,0 +1,658 @@

+[2025-04-17 15:51:14,150][38462] Saving configuration to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json...
+[2025-04-17 15:51:14,151][38462] Rollout worker 0 uses device cpu
+[2025-04-17 15:51:14,152][38462] Rollout worker 1 uses device cpu
+[2025-04-17 15:51:14,152][38462] Rollout worker 2 uses device cpu
+[2025-04-17 15:51:14,153][38462] Rollout worker 3 uses device cpu
+[2025-04-17 15:51:14,154][38462] Rollout worker 4 uses device cpu
+[2025-04-17 15:51:14,155][38462] Rollout worker 5 uses device cpu
+[2025-04-17 15:51:14,156][38462] Rollout worker 6 uses device cpu
+[2025-04-17 15:51:14,157][38462] Rollout worker 7 uses device cpu
+[2025-04-17 15:51:14,281][38462] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:51:14,281][38462] InferenceWorker_p0-w0: min num requests: 2
+[2025-04-17 15:51:14,301][38462] Starting all processes...
+[2025-04-17 15:51:14,302][38462] Starting process learner_proc0
+[2025-04-17 15:51:14,355][38462] Starting all processes...
+[2025-04-17 15:51:14,362][38462] Starting process inference_proc0-0
+[2025-04-17 15:51:14,362][38462] Starting process rollout_proc0
+[2025-04-17 15:51:14,363][38462] Starting process rollout_proc1
+[2025-04-17 15:51:14,364][38462] Starting process rollout_proc2
+[2025-04-17 15:51:14,364][38462] Starting process rollout_proc3
+[2025-04-17 15:51:14,365][38462] Starting process rollout_proc4
+[2025-04-17 15:51:14,366][38462] Starting process rollout_proc5
+[2025-04-17 15:51:14,366][38462] Starting process rollout_proc6
+[2025-04-17 15:51:14,369][38462] Starting process rollout_proc7
+[2025-04-17 15:51:20,105][48477] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,105][48473] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,105][48479] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,105][48474] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,105][48480] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,105][48475] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,105][48478] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,105][48476] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:51:20,106][48472] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:51:20,106][48459] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:51:20,106][48472] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2025-04-17 15:51:20,106][48459] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2025-04-17 15:51:20,204][48472] Num visible devices: 1
+[2025-04-17 15:51:20,205][48459] Num visible devices: 1
+[2025-04-17 15:51:20,206][48459] Starting seed is not provided
+[2025-04-17 15:51:20,207][48459] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:51:20,207][48459] Initializing actor-critic model on device cuda:0
+[2025-04-17 15:51:20,210][48459] RunningMeanStd input shape: (3, 72, 128)
+[2025-04-17 15:51:20,226][48459] RunningMeanStd input shape: (1,)
+[2025-04-17 15:51:20,270][48459] ConvEncoder: input_channels=3
+[2025-04-17 15:51:20,552][48459] Conv encoder output size: 512
+[2025-04-17 15:51:20,554][48459] Policy head output size: 512
+[2025-04-17 15:51:20,632][48459] Created Actor Critic model with architecture:
+[2025-04-17 15:51:20,636][48459] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2025-04-17 15:51:34,270][38462] Heartbeat connected on Batcher_0
+[2025-04-17 15:51:34,463][38462] Heartbeat connected on RolloutWorker_w2
+[2025-04-17 15:51:34,917][38462] Heartbeat connected on RolloutWorker_w1
+[2025-04-17 15:51:35,512][38462] Heartbeat connected on RolloutWorker_w4
+[2025-04-17 15:51:35,962][38462] Heartbeat connected on RolloutWorker_w3
+[2025-04-17 15:51:36,514][38462] Heartbeat connected on RolloutWorker_w5
+[2025-04-17 15:51:37,019][38462] Heartbeat connected on RolloutWorker_w0
+[2025-04-17 15:51:37,729][38462] Heartbeat connected on InferenceWorker_p0-w0
+[2025-04-17 15:51:38,109][38462] Heartbeat connected on RolloutWorker_w6
+[2025-04-17 15:51:38,468][38462] Heartbeat connected on RolloutWorker_w7
+[2025-04-17 15:52:29,514][38462] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 38462], exiting...
+[2025-04-17 15:52:29,567][48479] Stopping RolloutWorker_w6...
+[2025-04-17 15:52:29,567][48475] Stopping RolloutWorker_w2...
+[2025-04-17 15:52:29,566][48478] Stopping RolloutWorker_w5...
+[2025-04-17 15:52:29,567][48474] Stopping RolloutWorker_w1...
+[2025-04-17 15:52:29,567][48476] Stopping RolloutWorker_w3...
+[2025-04-17 15:52:29,566][48477] Stopping RolloutWorker_w4...
+[2025-04-17 15:52:29,567][48475] Loop rollout_proc2_evt_loop terminating...
+[2025-04-17 15:52:29,567][48479] Loop rollout_proc6_evt_loop terminating...
+[2025-04-17 15:52:29,568][48478] Loop rollout_proc5_evt_loop terminating...
+[2025-04-17 15:52:29,568][48476] Loop rollout_proc3_evt_loop terminating...
+[2025-04-17 15:52:29,568][48474] Loop rollout_proc1_evt_loop terminating...
+[2025-04-17 15:52:29,567][48480] Stopping RolloutWorker_w7...
+[2025-04-17 15:52:29,568][48477] Loop rollout_proc4_evt_loop terminating...
+[2025-04-17 15:52:29,567][48472] Stopping InferenceWorker_p0-w0...
+[2025-04-17 15:52:29,568][48473] Stopping RolloutWorker_w0...
+[2025-04-17 15:52:29,568][48480] Loop rollout_proc7_evt_loop terminating...
+[2025-04-17 15:52:29,569][48472] Loop inference_proc0-0_evt_loop terminating...
+[2025-04-17 15:52:29,569][48473] Loop rollout_proc0_evt_loop terminating...
+[2025-04-17 15:52:29,568][48459] Stopping Batcher_0...
+[2025-04-17 15:52:29,570][48459] Loop batcher_evt_loop terminating...
+[2025-04-17 15:52:29,566][38462] Runner profile tree view:
+main_loop: 75.2660
+[2025-04-17 15:52:29,576][38462] Collected {}, FPS: 0.0
+[2025-04-17 15:52:31,947][48459] Using optimizer <class 'torch.optim.adam.Adam'>
+[2025-04-17 15:52:33,028][48459] No checkpoints found
+[2025-04-17 15:52:33,028][48459] Did not load from checkpoint, starting from scratch!
+[2025-04-17 15:52:33,029][48459] Initialized policy 0 weights for model version 0
+[2025-04-17 15:52:33,037][48459] LearnerWorker_p0 finished initialization!
+[2025-04-17 15:52:33,037][48459] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2025-04-17 15:52:33,058][48459] Stopping LearnerWorker_p0...
+[2025-04-17 15:52:33,058][48459] Loop learner_proc0_evt_loop terminating...
+[2025-04-17 15:58:07,874][38462] Environment doom_basic already registered, overwriting...
+[2025-04-17 15:58:07,878][38462] Environment doom_two_colors_easy already registered, overwriting...
+[2025-04-17 15:58:07,878][38462] Environment doom_two_colors_hard already registered, overwriting...
+[2025-04-17 15:58:07,879][38462] Environment doom_dm already registered, overwriting...
+[2025-04-17 15:58:07,880][38462] Environment doom_dwango5 already registered, overwriting...
+[2025-04-17 15:58:07,881][38462] Environment doom_my_way_home_flat_actions already registered, overwriting...
+[2025-04-17 15:58:07,882][38462] Environment doom_defend_the_center_flat_actions already registered, overwriting...
+[2025-04-17 15:58:07,883][38462] Environment doom_my_way_home already registered, overwriting...
+[2025-04-17 15:58:07,885][38462] Environment doom_deadly_corridor already registered, overwriting...
+[2025-04-17 15:58:07,886][38462] Environment doom_defend_the_center already registered, overwriting...
+[2025-04-17 15:58:07,887][38462] Environment doom_defend_the_line already registered, overwriting...
+[2025-04-17 15:58:07,888][38462] Environment doom_health_gathering already registered, overwriting...
+[2025-04-17 15:58:07,889][38462] Environment doom_health_gathering_supreme already registered, overwriting...
+[2025-04-17 15:58:07,890][38462] Environment doom_battle already registered, overwriting...
+[2025-04-17 15:58:07,891][38462] Environment doom_battle2 already registered, overwriting...
+[2025-04-17 15:58:07,892][38462] Environment doom_duel_bots already registered, overwriting...
+[2025-04-17 15:58:07,892][38462] Environment doom_deathmatch_bots already registered, overwriting...
+[2025-04-17 15:58:07,894][38462] Environment doom_duel already registered, overwriting...
+[2025-04-17 15:58:07,894][38462] Environment doom_deathmatch_full already registered, overwriting...
+[2025-04-17 15:58:07,895][38462] Environment doom_benchmark already registered, overwriting...
+[2025-04-17 15:58:07,896][38462] register_encoder_factory: <function make_vizdoom_encoder at 0x7fd13a31b250>
+[2025-04-17 15:58:07,909][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
+[2025-04-17 15:58:07,911][38462] Overriding arg 'num_workers' with value 1 passed from command line
+[2025-04-17 15:58:07,913][38462] Overriding arg 'num_envs_per_worker' with value 1 passed from command line
+[2025-04-17 15:58:07,913][38462] Overriding arg 'train_for_env_steps' with value 4000 passed from command line
+[2025-04-17 15:58:07,921][38462] Experiment dir /home/uccacbo/Deep-RL-HF/train_dir/default_experiment already exists!
+[2025-04-17 15:58:07,922][38462] Resuming existing experiment from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment...
+[2025-04-17 15:58:07,924][38462] Weights and Biases integration disabled
+[2025-04-17 15:58:07,933][38462] Environment var CUDA_VISIBLE_DEVICES is 0
+[2025-04-17 15:58:10,470][38462] cfg.num_envs_per_worker=1 must be a multiple of cfg.worker_num_splits=2 (for double-buffered sampling you need to use even number of envs per worker)
+[2025-04-17 15:58:21,350][38462] Environment doom_basic already registered, overwriting...
+[2025-04-17 15:58:21,351][38462] Environment doom_two_colors_easy already registered, overwriting...
+[2025-04-17 15:58:21,352][38462] Environment doom_two_colors_hard already registered, overwriting...
+[2025-04-17 15:58:21,353][38462] Environment doom_dm already registered, overwriting...
+[2025-04-17 15:58:21,354][38462] Environment doom_dwango5 already registered, overwriting...
+[2025-04-17 15:58:21,354][38462] Environment doom_my_way_home_flat_actions already registered, overwriting...
+[2025-04-17 15:58:21,355][38462] Environment doom_defend_the_center_flat_actions already registered, overwriting...
+[2025-04-17 15:58:21,355][38462] Environment doom_my_way_home already registered, overwriting...
+[2025-04-17 15:58:21,356][38462] Environment doom_deadly_corridor already registered, overwriting...
+[2025-04-17 15:58:21,356][38462] Environment doom_defend_the_center already registered, overwriting...
+[2025-04-17 15:58:21,357][38462] Environment doom_defend_the_line already registered, overwriting...
+[2025-04-17 15:58:21,358][38462] Environment doom_health_gathering already registered, overwriting...
+[2025-04-17 15:58:21,359][38462] Environment doom_health_gathering_supreme already registered, overwriting...
+[2025-04-17 15:58:21,359][38462] Environment doom_battle already registered, overwriting...
+[2025-04-17 15:58:21,360][38462] Environment doom_battle2 already registered, overwriting...
+[2025-04-17 15:58:21,361][38462] Environment doom_duel_bots already registered, overwriting...
+[2025-04-17 15:58:21,361][38462] Environment doom_deathmatch_bots already registered, overwriting...
+[2025-04-17 15:58:21,362][38462] Environment doom_duel already registered, overwriting...
+[2025-04-17 15:58:21,363][38462] Environment doom_deathmatch_full already registered, overwriting...
+[2025-04-17 15:58:21,364][38462] Environment doom_benchmark already registered, overwriting...
+[2025-04-17 15:58:21,365][38462] register_encoder_factory: <function make_vizdoom_encoder at 0x7fd13a31b250>
+[2025-04-17 15:58:21,371][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
+[2025-04-17 15:58:21,372][38462] Overriding arg 'num_workers' with value 1 passed from command line
+[2025-04-17 15:58:21,373][38462] Overriding arg 'num_envs_per_worker' with value 2 passed from command line
+[2025-04-17 15:58:21,373][38462] Overriding arg 'train_for_env_steps' with value 4000 passed from command line
+[2025-04-17 15:58:21,378][38462] Experiment dir /home/uccacbo/Deep-RL-HF/train_dir/default_experiment already exists!
+[2025-04-17 15:58:21,379][38462] Resuming existing experiment from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment...
+[2025-04-17 15:58:21,380][38462] Weights and Biases integration disabled
+[2025-04-17 15:58:21,382][38462] Environment var CUDA_VISIBLE_DEVICES is 0
+[2025-04-17 15:58:22,980][38462] Starting experiment with the following configuration:
+help=False
+algo=APPO
+env=doom_health_gathering_supreme
+experiment=default_experiment
+train_dir=/home/uccacbo/Deep-RL-HF/train_dir
+restart_behavior=resume
+device=gpu
+seed=None
+num_policies=1
+async_rl=True
+serial_mode=False
+batched_sampling=False
+num_batches_to_accumulate=2
+worker_num_splits=2
+policy_workers_per_policy=1
+max_policy_lag=1000
+num_workers=1
+num_envs_per_worker=2
+batch_size=1024
+num_batches_per_epoch=1
+num_epochs=1
+rollout=32
+recurrence=32
+shuffle_minibatches=False
+gamma=0.99
+reward_scale=1.0
+reward_clip=1000.0
+value_bootstrap=False
+normalize_returns=True
+exploration_loss_coeff=0.001
+value_loss_coeff=0.5
+kl_loss_coeff=0.0
+exploration_loss=symmetric_kl
+gae_lambda=0.95
+ppo_clip_ratio=0.1
+ppo_clip_value=0.2
+with_vtrace=False
+vtrace_rho=1.0
+vtrace_c=1.0
+optimizer=adam
+adam_eps=1e-06
+adam_beta1=0.9
+adam_beta2=0.999
+max_grad_norm=4.0
+learning_rate=0.0001
+lr_schedule=constant
+lr_schedule_kl_threshold=0.008
+lr_adaptive_min=1e-06
+lr_adaptive_max=0.01
+obs_subtract_mean=0.0
+obs_scale=255.0
+normalize_input=True
+normalize_input_keys=None
+decorrelate_experience_max_seconds=0
+decorrelate_envs_on_one_worker=True
+actor_worker_gpus=[]
+set_workers_cpu_affinity=True
+force_envs_single_thread=False
+default_niceness=0
+log_to_file=True
+experiment_summaries_interval=10
+flush_summaries_interval=30
+stats_avg=100
+summaries_use_frameskip=True
+heartbeat_interval=20
+heartbeat_reporting_interval=600
+train_for_env_steps=4000
+train_for_seconds=10000000000
+save_every_sec=120
+keep_checkpoints=2
+load_checkpoint_kind=latest
+save_milestones_sec=-1
+save_best_every_sec=5
+save_best_metric=reward
+save_best_after=100000
+benchmark=False
+encoder_mlp_layers=[512, 512]
+encoder_conv_architecture=convnet_simple
+encoder_conv_mlp_layers=[512]
+use_rnn=True
+rnn_size=512
+rnn_type=gru
+rnn_num_layers=1
+decoder_mlp_layers=[]
+nonlinearity=elu
+policy_initialization=orthogonal
+policy_init_gain=1.0
+actor_critic_share_weights=True
+adaptive_stddev=True
+continuous_tanh_scale=0.0
+initial_stddev=1.0
+use_env_info_cache=False
+env_gpu_actions=False
+env_gpu_observations=True
+env_frameskip=4
+env_framestack=1
+pixel_format=CHW
+use_record_episode_statistics=False
+with_wandb=False
+wandb_user=None
+wandb_project=sample_factory
+wandb_group=None
+wandb_job_type=SF
+wandb_tags=[]
+with_pbt=False
+pbt_mix_policies_in_one_env=True
+pbt_period_env_steps=5000000
+pbt_start_mutation=20000000
+pbt_replace_fraction=0.3
+pbt_mutation_rate=0.15
+pbt_replace_reward_gap=0.1
+pbt_replace_reward_gap_absolute=1e-06
+pbt_optimize_gamma=False
+pbt_target_objective=true_objective
+pbt_perturb_min=1.1
+pbt_perturb_max=1.5
+num_agents=-1
+num_humans=0
+num_bots=-1
+start_bot_difficulty=None
+timelimit=None
+res_w=128
+res_h=72
+wide_aspect_ratio=False
+eval_env_frameskip=1
+fps=35
+command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=400000
+cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 400000}
+git_hash=unknown
+git_repo_name=not a git repository
+[2025-04-17 15:58:22,981][38462] Saving configuration to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json...
+[2025-04-17 15:58:22,982][38462] Rollout worker 0 uses device cpu
+[2025-04-17 15:58:23,030][38462] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:58:23,031][38462] InferenceWorker_p0-w0: min num requests: 1
+[2025-04-17 15:58:23,035][38462] Starting all processes...
+[2025-04-17 15:58:23,035][38462] Starting process learner_proc0
+[2025-04-17 15:58:23,085][38462] Starting all processes...
+[2025-04-17 15:58:23,088][38462] Starting process inference_proc0-0
+[2025-04-17 15:58:23,089][38462] Starting process rollout_proc0
+[2025-04-17 15:58:24,649][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:58:24,649][51423] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2025-04-17 15:58:24,656][51429] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[2025-04-17 15:58:24,671][51430] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:58:24,671][51430] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2025-04-17 15:58:24,718][51430] Num visible devices: 1
+[2025-04-17 15:58:24,718][51423] Num visible devices: 1
+[2025-04-17 15:58:24,719][51423] Starting seed is not provided
+[2025-04-17 15:58:24,720][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:58:24,720][51423] Initializing actor-critic model on device cuda:0
+[2025-04-17 15:58:24,720][51423] RunningMeanStd input shape: (3, 72, 128)
+[2025-04-17 15:58:24,721][51423] RunningMeanStd input shape: (1,)
+[2025-04-17 15:58:24,728][51423] ConvEncoder: input_channels=3
+[2025-04-17 15:58:24,838][51423] Conv encoder output size: 512
+[2025-04-17 15:58:24,839][51423] Policy head output size: 512
+[2025-04-17 15:58:24,856][51423] Created Actor Critic model with architecture:
+[2025-04-17 15:58:24,856][51423] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2025-04-17 15:58:25,270][51423] Using optimizer <class 'torch.optim.adam.Adam'>
+[2025-04-17 15:58:26,272][51423] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2025-04-17 15:58:26,309][51423] Loading model from checkpoint
+[2025-04-17 15:58:26,310][51423] Loaded experiment state at self.train_step=0, self.env_steps=0
+[2025-04-17 15:58:26,310][51423] Initialized policy 0 weights for model version 0
+[2025-04-17 15:58:26,315][51423] LearnerWorker_p0 finished initialization!
+[2025-04-17 15:58:26,316][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2025-04-17 15:58:26,382][38462] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:58:26,480][51430] RunningMeanStd input shape: (3, 72, 128)
+[2025-04-17 15:58:26,481][51430] RunningMeanStd input shape: (1,)
+[2025-04-17 15:58:26,488][51430] ConvEncoder: input_channels=3
+[2025-04-17 15:58:26,556][51430] Conv encoder output size: 512
+[2025-04-17 15:58:26,556][51430] Policy head output size: 512
+[2025-04-17 15:58:26,595][38462] Inference worker 0-0 is ready!
+[2025-04-17 15:58:26,596][38462] All inference workers are ready! Signal rollout workers to start!
+[2025-04-17 15:58:26,692][51429] Doom resolution: 160x120, resize resolution: (128, 72)
+[2025-04-17 15:58:26,978][51429] Decorrelating experience for 0 frames...
+[2025-04-17 15:58:27,129][51429] Decorrelating experience for 32 frames...
+[2025-04-17 15:58:31,382][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 72.2. Samples: 361. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:58:31,385][38462] Avg episode reward: [(0, '4.080')]
+[2025-04-17 15:58:36,385][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 167.4. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:58:36,420][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:58:41,389][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 111.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:58:41,405][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:58:43,078][38462] Heartbeat connected on Batcher_0
+[2025-04-17 15:58:43,145][38462] Heartbeat connected on RolloutWorker_w0
+[2025-04-17 15:58:46,385][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 83.7. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:58:46,453][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:58:51,549][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 66.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:58:52,038][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:58:56,514][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 55.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:58:56,762][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:01,432][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 47.8. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:01,678][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:06,401][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 41.8. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:06,881][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:11,636][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 37.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:12,123][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:16,672][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 29.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:17,010][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:23,230][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:23,341][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:26,435][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:26,561][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:31,394][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:31,468][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:36,412][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:36,464][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:41,432][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:41,842][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:46,467][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:46,605][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:51,396][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:51,426][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 15:59:57,740][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 15:59:58,246][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:01,482][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:01,776][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:06,505][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:06,724][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:11,616][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:11,940][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:16,522][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:16,973][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:21,574][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:21,990][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:26,518][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:26,894][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:32,477][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:33,016][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:36,512][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:36,813][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:41,673][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:42,174][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:46,558][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:46,922][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:51,544][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:51,982][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:00:56,573][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:00:56,913][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:01,408][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:01:01,494][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:06,768][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:01:07,035][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:11,481][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:01:11,860][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:16,526][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:01:17,049][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:21,568][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:01:22,025][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:26,577][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:01:27,041][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:32,480][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2025-04-17 16:01:37,774][38462] Avg episode reward: [(0, '4.191')]
+[2025-04-17 16:01:41,829][38462] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 38462], exiting...
+[2025-04-17 16:01:41,835][51423] Stopping Batcher_0...
+[2025-04-17 16:01:41,836][51423] Loop batcher_evt_loop terminating...
+[2025-04-17 16:01:41,835][38462] Runner profile tree view:
+main_loop: 198.8007
+[2025-04-17 16:01:41,838][38462] Collected {0: 0}, FPS: 0.0
+[2025-04-17 16:01:41,897][51429] Stopping RolloutWorker_w0...
+[2025-04-17 16:01:41,900][51429] Loop rollout_proc0_evt_loop terminating...
+[2025-04-17 16:01:42,285][51430] Weights refcount: 2 0
+[2025-04-17 16:01:42,292][51430] Stopping InferenceWorker_p0-w0...
+[2025-04-17 16:01:42,293][51430] Loop inference_proc0-0_evt_loop terminating...
+[2025-04-17 16:01:42,343][51423] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
+[2025-04-17 16:01:42,607][51423] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
+[2025-04-17 16:01:42,772][51423] Stopping LearnerWorker_p0...
+[2025-04-17 16:01:42,772][51423] Loop learner_proc0_evt_loop terminating...
+[2025-04-17 16:02:52,360][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
+[2025-04-17 16:02:52,360][38462] Adding new argument 'no_render'=True that is not in the saved config file!
+[2025-04-17 16:02:52,361][38462] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-04-17 16:02:52,362][38462] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-04-17 16:02:52,362][38462] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-04-17 16:02:52,362][38462] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2025-04-17 16:02:52,363][38462] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2025-04-17 16:02:52,364][38462] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2025-04-17 16:02:52,365][38462] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2025-04-17 16:02:52,365][38462] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-04-17 16:02:52,366][38462] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-04-17 16:02:52,367][38462] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-04-17 16:02:52,368][38462] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-04-17 16:02:52,369][38462] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-04-17 16:02:52,401][38462] Doom resolution: 160x120, resize resolution: (128, 72)
+[2025-04-17 16:02:52,406][38462] RunningMeanStd input shape: (3, 72, 128)
+[2025-04-17 16:02:52,409][38462] RunningMeanStd input shape: (1,)
+[2025-04-17 16:02:52,439][38462] ConvEncoder: input_channels=3
+[2025-04-17 16:02:52,562][38462] Conv encoder output size: 512
+[2025-04-17 16:02:52,562][38462] Policy head output size: 512
+[2025-04-17 16:02:53,101][38462] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
+[2025-04-17 16:02:53,978][38462] Num frames 100...
+[2025-04-17 16:02:54,078][38462] Num frames 200...
+[2025-04-17 16:02:54,181][38462] Num frames 300...
+[2025-04-17 16:02:54,318][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-04-17 16:02:54,319][38462] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-04-17 16:02:54,342][38462] Num frames 400...
+[2025-04-17 16:02:54,452][38462] Num frames 500...
+[2025-04-17 16:02:54,546][38462] Num frames 600...
+[2025-04-17 16:02:54,647][38462] Num frames 700...
+[2025-04-17 16:02:54,754][38462] Num frames 800...
+[2025-04-17 16:02:54,846][38462] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2025-04-17 16:02:54,847][38462] Avg episode reward: 4.660, avg true_objective: 4.160
+[2025-04-17 16:02:54,921][38462] Num frames 900...
+[2025-04-17 16:02:55,019][38462] Num frames 1000...
+[2025-04-17 16:02:55,120][38462] Num frames 1100...
+[2025-04-17 16:02:55,219][38462] Num frames 1200...
+[2025-04-17 16:02:55,293][38462] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
+[2025-04-17 16:02:55,295][38462] Avg episode reward: 4.387, avg true_objective: 4.053
+[2025-04-17 16:02:55,393][38462] Num frames 1300...
+[2025-04-17 16:02:55,488][38462] Num frames 1400...
+[2025-04-17 16:02:55,594][38462] Num frames 1500...
+[2025-04-17 16:02:55,695][38462] Num frames 1600...
+[2025-04-17 16:02:55,780][38462] Avg episode rewards: #0: 4.580, true rewards: #0: 4.080
+[2025-04-17 16:02:55,781][38462] Avg episode reward: 4.580, avg true_objective: 4.080
+[2025-04-17 16:02:55,847][38462] Num frames 1700...
+[2025-04-17 16:02:55,945][38462] Num frames 1800...
+[2025-04-17 16:02:56,038][38462] Num frames 1900...
+[2025-04-17 16:02:56,132][38462] Num frames 2000...
+[2025-04-17 16:02:56,203][38462] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
+[2025-04-17 16:02:56,204][38462] Avg episode reward: 4.432, avg true_objective: 4.032
+[2025-04-17 16:02:56,288][38462] Num frames 2100...
+[2025-04-17 16:02:56,378][38462] Num frames 2200...
+[2025-04-17 16:02:56,480][38462] Num frames 2300...
+[2025-04-17 16:02:56,581][38462] Num frames 2400...
+[2025-04-17 16:02:56,703][38462] Avg episode rewards: #0: 4.607, true rewards: #0: 4.107
+[2025-04-17 16:02:56,704][38462] Avg episode reward: 4.607, avg true_objective: 4.107
+[2025-04-17 16:02:56,756][38462] Num frames 2500...
+[2025-04-17 16:02:56,866][38462] Num frames 2600...
+[2025-04-17 16:02:56,971][38462] Num frames 2700...
+[2025-04-17 16:02:57,079][38462] Num frames 2800...
+[2025-04-17 16:02:57,183][38462] Avg episode rewards: #0: 4.497, true rewards: #0: 4.069
+[2025-04-17 16:02:57,184][38462] Avg episode reward: 4.497, avg true_objective: 4.069
+[2025-04-17 16:02:57,247][38462] Num frames 2900...
+[2025-04-17 16:02:57,379][38462] Num frames 3000...
+[2025-04-17 16:02:57,473][38462] Num frames 3100...
+[2025-04-17 16:02:57,573][38462] Num frames 3200...
+[2025-04-17 16:02:57,667][38462] Avg episode rewards: #0: 4.415, true rewards: #0: 4.040
+[2025-04-17 16:02:57,668][38462] Avg episode reward: 4.415, avg true_objective: 4.040
+[2025-04-17 16:02:57,740][38462] Num frames 3300...
+[2025-04-17 16:02:57,834][38462] Num frames 3400...
+[2025-04-17 16:02:57,924][38462] Num frames 3500...
+[2025-04-17 16:02:58,021][38462] Num frames 3600...
+[2025-04-17 16:02:58,089][38462] Avg episode rewards: #0: 4.351, true rewards: #0: 4.018
+[2025-04-17 16:02:58,090][38462] Avg episode reward: 4.351, avg true_objective: 4.018
+[2025-04-17 16:02:58,178][38462] Num frames 3700...
+[2025-04-17 16:02:58,280][38462] Num frames 3800...
+[2025-04-17 16:02:58,385][38462] Num frames 3900...
+[2025-04-17 16:02:58,479][38462] Num frames 4000...
+[2025-04-17 16:02:58,531][38462] Avg episode rewards: #0: 4.300, true rewards: #0: 4.000
+[2025-04-17 16:02:58,532][38462] Avg episode reward: 4.300, avg true_objective: 4.000
+[2025-04-17 16:03:03,627][38462] Replay video saved to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/replay.mp4!
+[2025-04-17 16:04:25,402][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
+[2025-04-17 16:04:25,403][38462] Adding new argument 'no_render'=True that is not in the saved config file!
+[2025-04-17 16:04:25,404][38462] Adding new argument 'save_video'=True that is not in the saved config file!
+[2025-04-17 16:04:25,405][38462] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2025-04-17 16:04:25,406][38462] Adding new argument 'video_name'=None that is not in the saved config file!
+[2025-04-17 16:04:25,406][38462] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
+[2025-04-17 16:04:25,407][38462] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2025-04-17 16:04:25,408][38462] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2025-04-17 16:04:25,409][38462] Adding new argument 'hf_repository'='c-bone/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2025-04-17 16:04:25,410][38462] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2025-04-17 16:04:25,410][38462] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2025-04-17 16:04:25,411][38462] Adding new argument 'train_script'=None that is not in the saved config file!
+[2025-04-17 16:04:25,412][38462] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2025-04-17 16:04:25,413][38462] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2025-04-17 16:04:25,462][38462] RunningMeanStd input shape: (3, 72, 128)
+[2025-04-17 16:04:25,467][38462] RunningMeanStd input shape: (1,)
+[2025-04-17 16:04:25,484][38462] ConvEncoder: input_channels=3
+[2025-04-17 16:04:25,521][38462] Conv encoder output size: 512
+[2025-04-17 16:04:25,522][38462] Policy head output size: 512
+[2025-04-17 16:04:25,546][38462] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
+[2025-04-17 16:04:26,051][38462] Num frames 100...
+[2025-04-17 16:04:26,217][38462] Num frames 200...
+[2025-04-17 16:04:26,369][38462] Num frames 300...
+[2025-04-17 16:04:26,574][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-04-17 16:04:26,575][38462] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-04-17 16:04:26,605][38462] Num frames 400...
+[2025-04-17 16:04:26,774][38462] Num frames 500...
+[2025-04-17 16:04:26,945][38462] Num frames 600...
+[2025-04-17 16:04:27,102][38462] Num frames 700...
+[2025-04-17 16:04:27,229][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-04-17 16:04:27,230][38462] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-04-17 16:04:27,286][38462] Num frames 800...
+[2025-04-17 16:04:27,450][38462] Num frames 900...
+[2025-04-17 16:04:27,620][38462] Num frames 1000...
+[2025-04-17 16:04:27,798][38462] Num frames 1100...
+[2025-04-17 16:04:27,952][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-04-17 16:04:27,953][38462] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-04-17 16:04:28,021][38462] Num frames 1200...
+[2025-04-17 16:04:28,161][38462] Num frames 1300...
+[2025-04-17 16:04:28,333][38462] Num frames 1400...
+[2025-04-17 16:04:28,505][38462] Num frames 1500...
+[2025-04-17 16:04:28,620][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2025-04-17 16:04:28,621][38462] Avg episode reward: 3.840, avg true_objective: 3.840
+[2025-04-17 16:04:28,729][38462] Num frames 1600...
+[2025-04-17 16:04:28,895][38462] Num frames 1700...
+[2025-04-17 16:04:29,022][38462] Num frames 1800...
+[2025-04-17 16:04:29,187][38462] Num frames 1900...
+[2025-04-17 16:04:29,376][38462] Avg episode rewards: #0: 4.168, true rewards: #0: 3.968
+[2025-04-17 16:04:29,378][38462] Avg episode reward: 4.168, avg true_objective: 3.968
+[2025-04-17 16:04:29,407][38462] Num frames 2000...
+[2025-04-17 16:04:29,574][38462] Num frames 2100...
+[2025-04-17 16:04:29,755][38462] Num frames 2200...
+[2025-04-17 16:04:29,866][38462] Avg episode rewards: #0: 3.900, true rewards: #0: 3.733
+[2025-04-17 16:04:29,867][38462] Avg episode reward: 3.900, avg true_objective: 3.733
+[2025-04-17 16:04:29,984][38462] Num frames 2300...
+[2025-04-17 16:04:30,150][38462] Num frames 2400...
+[2025-04-17 16:04:30,311][38462] Num frames 2500...
+[2025-04-17 16:04:30,476][38462] Num frames 2600...
+[2025-04-17 16:04:30,565][38462] Avg episode rewards: #0: 3.891, true rewards: #0: 3.749
+[2025-04-17 16:04:30,566][38462] Avg episode reward: 3.891, avg true_objective: 3.749
+[2025-04-17 16:04:30,690][38462] Num frames 2700...
+[2025-04-17 16:04:30,816][38462] Num frames 2800...
+[2025-04-17 16:04:30,980][38462] Num frames 2900...
+[2025-04-17 16:04:31,143][38462] Num frames 3000...
+[2025-04-17 16:04:31,311][38462] Avg episode rewards: #0: 4.090, true rewards: #0: 3.840
+[2025-04-17 16:04:31,312][38462] Avg episode reward: 4.090, avg true_objective: 3.840
+[2025-04-17 16:04:31,360][38462] Num frames 3100...
+[2025-04-17 16:04:33,597][38462] Num frames 3200...
+[2025-04-17 16:04:33,721][38462] Num frames 3300...
+[2025-04-17 16:04:33,886][38462] Num frames 3400...
+[2025-04-17 16:04:34,033][38462] Avg episode rewards: #0: 4.062, true rewards: #0: 3.840
+[2025-04-17 16:04:34,034][38462] Avg episode reward: 4.062, avg true_objective: 3.840
+[2025-04-17 16:04:34,108][38462] Num frames 3500...
+[2025-04-17 16:04:34,275][38462] Num frames 3600...
+[2025-04-17 16:04:34,440][38462] Num frames 3700...
+[2025-04-17 16:04:34,579][38462] Num frames 3800...
+[2025-04-17 16:04:34,732][38462] Num frames 3900...
+[2025-04-17 16:04:34,846][38462] Avg episode rewards: #0: 4.336, true rewards: #0: 3.936
+[2025-04-17 16:04:34,847][38462] Avg episode reward: 4.336, avg true_objective: 3.936
+[2025-04-17 16:04:40,292][38462] Replay video saved to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/replay.mp4!