Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- .summary/0/events.out.tfevents.1744901470.Cyprien-Bone-UCL +3 -0
- .summary/0/events.out.tfevents.1744901887.Cyprien-Bone-UCL +0 -0
- .summary/0/events.out.tfevents.1744901901.Cyprien-Bone-UCL +3 -0
- README.md +56 -0
- checkpoint_p0/checkpoint_000000000_0.pth +3 -0
- checkpoint_p0/checkpoint_000000001_4096.pth +3 -0
- config.json +142 -0
- replay.mp4 +3 -0
- sf_log.txt +658 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
replay.mp4 filter=lfs diff=lfs merge=lfs -text
|
.summary/0/events.out.tfevents.1744901470.Cyprien-Bone-UCL
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcf232213ef535583a6ed8e1e0e2287069aff9c97a9aa2aa8b7fc653261c5a7b
|
3 |
+
size 40
|
.summary/0/events.out.tfevents.1744901887.Cyprien-Bone-UCL
ADDED
File without changes
|
.summary/0/events.out.tfevents.1744901901.Cyprien-Bone-UCL
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f19c3acdd07b3b2a42954ee9ba686d82a4a1409b8af806ff47ccb690e066da8a
|
3 |
+
size 7582
|
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
model-index:
|
8 |
+
- name: APPO
|
9 |
+
results:
|
10 |
+
- task:
|
11 |
+
type: reinforcement-learning
|
12 |
+
name: reinforcement-learning
|
13 |
+
dataset:
|
14 |
+
name: doom_health_gathering_supreme
|
15 |
+
type: doom_health_gathering_supreme
|
16 |
+
metrics:
|
17 |
+
- type: mean_reward
|
18 |
+
value: 3.94 +/- 0.57
|
19 |
+
name: mean_reward
|
20 |
+
verified: false
|
21 |
+
---
|
22 |
+
|
23 |
+
A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
|
24 |
+
|
25 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
27 |
+
|
28 |
+
|
29 |
+
## Downloading the model
|
30 |
+
|
31 |
+
After installing Sample-Factory, download the model with:
|
32 |
+
```
|
33 |
+
python -m sample_factory.huggingface.load_from_hub -r c-bone/rl_course_vizdoom_health_gathering_supreme
|
34 |
+
```
|
35 |
+
|
36 |
+
|
37 |
+
## Using the model
|
38 |
+
|
39 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
+
```
|
41 |
+
python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme
|
42 |
+
```
|
43 |
+
|
44 |
+
|
45 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
46 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
47 |
+
|
48 |
+
## Training with this model
|
49 |
+
|
50 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
+
```
|
52 |
+
python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme --restart_behavior=resume --train_for_env_steps=10000000000
|
53 |
+
```
|
54 |
+
|
55 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
56 |
+
|
checkpoint_p0/checkpoint_000000000_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d00fb1a2bb3abae2a4e3257a57e24bbcdcadd1ae852097add4e09c4f6be78ade
|
3 |
+
size 11939285
|
checkpoint_p0/checkpoint_000000001_4096.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25bd7d6dca2576306bf7b451fb8263f4f3722b27e0840e4a5dcb4913aac874d7
|
3 |
+
size 34929349
|
config.json
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "doom_health_gathering_supreme",
|
5 |
+
"experiment": "default_experiment",
|
6 |
+
"train_dir": "/home/uccacbo/Deep-RL-HF/train_dir",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": null,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": true,
|
12 |
+
"serial_mode": false,
|
13 |
+
"batched_sampling": false,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 2,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 1,
|
19 |
+
"num_envs_per_worker": 2,
|
20 |
+
"batch_size": 1024,
|
21 |
+
"num_batches_per_epoch": 1,
|
22 |
+
"num_epochs": 1,
|
23 |
+
"rollout": 32,
|
24 |
+
"recurrence": 32,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 1.0,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": false,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.001,
|
32 |
+
"value_loss_coeff": 0.5,
|
33 |
+
"kl_loss_coeff": 0.0,
|
34 |
+
"exploration_loss": "symmetric_kl",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.1,
|
37 |
+
"ppo_clip_value": 0.2,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 4.0,
|
46 |
+
"learning_rate": 0.0001,
|
47 |
+
"lr_schedule": "constant",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"lr_adaptive_min": 1e-06,
|
50 |
+
"lr_adaptive_max": 0.01,
|
51 |
+
"obs_subtract_mean": 0.0,
|
52 |
+
"obs_scale": 255.0,
|
53 |
+
"normalize_input": true,
|
54 |
+
"normalize_input_keys": null,
|
55 |
+
"decorrelate_experience_max_seconds": 0,
|
56 |
+
"decorrelate_envs_on_one_worker": true,
|
57 |
+
"actor_worker_gpus": [],
|
58 |
+
"set_workers_cpu_affinity": true,
|
59 |
+
"force_envs_single_thread": false,
|
60 |
+
"default_niceness": 0,
|
61 |
+
"log_to_file": true,
|
62 |
+
"experiment_summaries_interval": 10,
|
63 |
+
"flush_summaries_interval": 30,
|
64 |
+
"stats_avg": 100,
|
65 |
+
"summaries_use_frameskip": true,
|
66 |
+
"heartbeat_interval": 20,
|
67 |
+
"heartbeat_reporting_interval": 600,
|
68 |
+
"train_for_env_steps": 4000,
|
69 |
+
"train_for_seconds": 10000000000,
|
70 |
+
"save_every_sec": 120,
|
71 |
+
"keep_checkpoints": 2,
|
72 |
+
"load_checkpoint_kind": "latest",
|
73 |
+
"save_milestones_sec": -1,
|
74 |
+
"save_best_every_sec": 5,
|
75 |
+
"save_best_metric": "reward",
|
76 |
+
"save_best_after": 100000,
|
77 |
+
"benchmark": false,
|
78 |
+
"encoder_mlp_layers": [
|
79 |
+
512,
|
80 |
+
512
|
81 |
+
],
|
82 |
+
"encoder_conv_architecture": "convnet_simple",
|
83 |
+
"encoder_conv_mlp_layers": [
|
84 |
+
512
|
85 |
+
],
|
86 |
+
"use_rnn": true,
|
87 |
+
"rnn_size": 512,
|
88 |
+
"rnn_type": "gru",
|
89 |
+
"rnn_num_layers": 1,
|
90 |
+
"decoder_mlp_layers": [],
|
91 |
+
"nonlinearity": "elu",
|
92 |
+
"policy_initialization": "orthogonal",
|
93 |
+
"policy_init_gain": 1.0,
|
94 |
+
"actor_critic_share_weights": true,
|
95 |
+
"adaptive_stddev": true,
|
96 |
+
"continuous_tanh_scale": 0.0,
|
97 |
+
"initial_stddev": 1.0,
|
98 |
+
"use_env_info_cache": false,
|
99 |
+
"env_gpu_actions": false,
|
100 |
+
"env_gpu_observations": true,
|
101 |
+
"env_frameskip": 4,
|
102 |
+
"env_framestack": 1,
|
103 |
+
"pixel_format": "CHW",
|
104 |
+
"use_record_episode_statistics": false,
|
105 |
+
"with_wandb": false,
|
106 |
+
"wandb_user": null,
|
107 |
+
"wandb_project": "sample_factory",
|
108 |
+
"wandb_group": null,
|
109 |
+
"wandb_job_type": "SF",
|
110 |
+
"wandb_tags": [],
|
111 |
+
"with_pbt": false,
|
112 |
+
"pbt_mix_policies_in_one_env": true,
|
113 |
+
"pbt_period_env_steps": 5000000,
|
114 |
+
"pbt_start_mutation": 20000000,
|
115 |
+
"pbt_replace_fraction": 0.3,
|
116 |
+
"pbt_mutation_rate": 0.15,
|
117 |
+
"pbt_replace_reward_gap": 0.1,
|
118 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
119 |
+
"pbt_optimize_gamma": false,
|
120 |
+
"pbt_target_objective": "true_objective",
|
121 |
+
"pbt_perturb_min": 1.1,
|
122 |
+
"pbt_perturb_max": 1.5,
|
123 |
+
"num_agents": -1,
|
124 |
+
"num_humans": 0,
|
125 |
+
"num_bots": -1,
|
126 |
+
"start_bot_difficulty": null,
|
127 |
+
"timelimit": null,
|
128 |
+
"res_w": 128,
|
129 |
+
"res_h": 72,
|
130 |
+
"wide_aspect_ratio": false,
|
131 |
+
"eval_env_frameskip": 1,
|
132 |
+
"fps": 35,
|
133 |
+
"command_line": "--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=400000",
|
134 |
+
"cli_args": {
|
135 |
+
"env": "doom_health_gathering_supreme",
|
136 |
+
"num_workers": 8,
|
137 |
+
"num_envs_per_worker": 4,
|
138 |
+
"train_for_env_steps": 400000
|
139 |
+
},
|
140 |
+
"git_hash": "unknown",
|
141 |
+
"git_repo_name": "not a git repository"
|
142 |
+
}
|
replay.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ccefb1f584447e4c49efdc2d81038a36c6797a4a6b48a5831e78d7b85ae7580
|
3 |
+
size 5945675
|
sf_log.txt
ADDED
@@ -0,0 +1,658 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2025-04-17 15:51:14,150][38462] Saving configuration to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json...
|
2 |
+
[2025-04-17 15:51:14,151][38462] Rollout worker 0 uses device cpu
|
3 |
+
[2025-04-17 15:51:14,152][38462] Rollout worker 1 uses device cpu
|
4 |
+
[2025-04-17 15:51:14,152][38462] Rollout worker 2 uses device cpu
|
5 |
+
[2025-04-17 15:51:14,153][38462] Rollout worker 3 uses device cpu
|
6 |
+
[2025-04-17 15:51:14,154][38462] Rollout worker 4 uses device cpu
|
7 |
+
[2025-04-17 15:51:14,155][38462] Rollout worker 5 uses device cpu
|
8 |
+
[2025-04-17 15:51:14,156][38462] Rollout worker 6 uses device cpu
|
9 |
+
[2025-04-17 15:51:14,157][38462] Rollout worker 7 uses device cpu
|
10 |
+
[2025-04-17 15:51:14,281][38462] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
11 |
+
[2025-04-17 15:51:14,281][38462] InferenceWorker_p0-w0: min num requests: 2
|
12 |
+
[2025-04-17 15:51:14,301][38462] Starting all processes...
|
13 |
+
[2025-04-17 15:51:14,302][38462] Starting process learner_proc0
|
14 |
+
[2025-04-17 15:51:14,355][38462] Starting all processes...
|
15 |
+
[2025-04-17 15:51:14,362][38462] Starting process inference_proc0-0
|
16 |
+
[2025-04-17 15:51:14,362][38462] Starting process rollout_proc0
|
17 |
+
[2025-04-17 15:51:14,363][38462] Starting process rollout_proc1
|
18 |
+
[2025-04-17 15:51:14,364][38462] Starting process rollout_proc2
|
19 |
+
[2025-04-17 15:51:14,364][38462] Starting process rollout_proc3
|
20 |
+
[2025-04-17 15:51:14,365][38462] Starting process rollout_proc4
|
21 |
+
[2025-04-17 15:51:14,366][38462] Starting process rollout_proc5
|
22 |
+
[2025-04-17 15:51:14,366][38462] Starting process rollout_proc6
|
23 |
+
[2025-04-17 15:51:14,369][38462] Starting process rollout_proc7
|
24 |
+
[2025-04-17 15:51:20,105][48477] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
25 |
+
[2025-04-17 15:51:20,105][48473] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
26 |
+
[2025-04-17 15:51:20,105][48479] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
27 |
+
[2025-04-17 15:51:20,105][48474] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
28 |
+
[2025-04-17 15:51:20,105][48480] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
29 |
+
[2025-04-17 15:51:20,105][48475] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
30 |
+
[2025-04-17 15:51:20,105][48478] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
31 |
+
[2025-04-17 15:51:20,105][48476] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
32 |
+
[2025-04-17 15:51:20,106][48472] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
33 |
+
[2025-04-17 15:51:20,106][48459] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
34 |
+
[2025-04-17 15:51:20,106][48472] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
35 |
+
[2025-04-17 15:51:20,106][48459] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
36 |
+
[2025-04-17 15:51:20,204][48472] Num visible devices: 1
|
37 |
+
[2025-04-17 15:51:20,205][48459] Num visible devices: 1
|
38 |
+
[2025-04-17 15:51:20,206][48459] Starting seed is not provided
|
39 |
+
[2025-04-17 15:51:20,207][48459] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
40 |
+
[2025-04-17 15:51:20,207][48459] Initializing actor-critic model on device cuda:0
|
41 |
+
[2025-04-17 15:51:20,210][48459] RunningMeanStd input shape: (3, 72, 128)
|
42 |
+
[2025-04-17 15:51:20,226][48459] RunningMeanStd input shape: (1,)
|
43 |
+
[2025-04-17 15:51:20,270][48459] ConvEncoder: input_channels=3
|
44 |
+
[2025-04-17 15:51:20,552][48459] Conv encoder output size: 512
|
45 |
+
[2025-04-17 15:51:20,554][48459] Policy head output size: 512
|
46 |
+
[2025-04-17 15:51:20,632][48459] Created Actor Critic model with architecture:
|
47 |
+
[2025-04-17 15:51:20,636][48459] ActorCriticSharedWeights(
|
48 |
+
(obs_normalizer): ObservationNormalizer(
|
49 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
50 |
+
(running_mean_std): ModuleDict(
|
51 |
+
(obs): RunningMeanStdInPlace()
|
52 |
+
)
|
53 |
+
)
|
54 |
+
)
|
55 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
56 |
+
(encoder): VizdoomEncoder(
|
57 |
+
(basic_encoder): ConvEncoder(
|
58 |
+
(enc): RecursiveScriptModule(
|
59 |
+
original_name=ConvEncoderImpl
|
60 |
+
(conv_head): RecursiveScriptModule(
|
61 |
+
original_name=Sequential
|
62 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
63 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
64 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
65 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
66 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
67 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
68 |
+
)
|
69 |
+
(mlp_layers): RecursiveScriptModule(
|
70 |
+
original_name=Sequential
|
71 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
72 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
73 |
+
)
|
74 |
+
)
|
75 |
+
)
|
76 |
+
)
|
77 |
+
(core): ModelCoreRNN(
|
78 |
+
(core): GRU(512, 512)
|
79 |
+
)
|
80 |
+
(decoder): MlpDecoder(
|
81 |
+
(mlp): Identity()
|
82 |
+
)
|
83 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
84 |
+
(action_parameterization): ActionParameterizationDefault(
|
85 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
86 |
+
)
|
87 |
+
)
|
88 |
+
[2025-04-17 15:51:34,270][38462] Heartbeat connected on Batcher_0
|
89 |
+
[2025-04-17 15:51:34,463][38462] Heartbeat connected on RolloutWorker_w2
|
90 |
+
[2025-04-17 15:51:34,917][38462] Heartbeat connected on RolloutWorker_w1
|
91 |
+
[2025-04-17 15:51:35,512][38462] Heartbeat connected on RolloutWorker_w4
|
92 |
+
[2025-04-17 15:51:35,962][38462] Heartbeat connected on RolloutWorker_w3
|
93 |
+
[2025-04-17 15:51:36,514][38462] Heartbeat connected on RolloutWorker_w5
|
94 |
+
[2025-04-17 15:51:37,019][38462] Heartbeat connected on RolloutWorker_w0
|
95 |
+
[2025-04-17 15:51:37,729][38462] Heartbeat connected on InferenceWorker_p0-w0
|
96 |
+
[2025-04-17 15:51:38,109][38462] Heartbeat connected on RolloutWorker_w6
|
97 |
+
[2025-04-17 15:51:38,468][38462] Heartbeat connected on RolloutWorker_w7
|
98 |
+
[2025-04-17 15:52:29,514][38462] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 38462], exiting...
|
99 |
+
[2025-04-17 15:52:29,567][48479] Stopping RolloutWorker_w6...
|
100 |
+
[2025-04-17 15:52:29,567][48475] Stopping RolloutWorker_w2...
|
101 |
+
[2025-04-17 15:52:29,566][48478] Stopping RolloutWorker_w5...
|
102 |
+
[2025-04-17 15:52:29,567][48474] Stopping RolloutWorker_w1...
|
103 |
+
[2025-04-17 15:52:29,567][48476] Stopping RolloutWorker_w3...
|
104 |
+
[2025-04-17 15:52:29,566][48477] Stopping RolloutWorker_w4...
|
105 |
+
[2025-04-17 15:52:29,567][48475] Loop rollout_proc2_evt_loop terminating...
|
106 |
+
[2025-04-17 15:52:29,567][48479] Loop rollout_proc6_evt_loop terminating...
|
107 |
+
[2025-04-17 15:52:29,568][48478] Loop rollout_proc5_evt_loop terminating...
|
108 |
+
[2025-04-17 15:52:29,568][48476] Loop rollout_proc3_evt_loop terminating...
|
109 |
+
[2025-04-17 15:52:29,568][48474] Loop rollout_proc1_evt_loop terminating...
|
110 |
+
[2025-04-17 15:52:29,567][48480] Stopping RolloutWorker_w7...
|
111 |
+
[2025-04-17 15:52:29,568][48477] Loop rollout_proc4_evt_loop terminating...
|
112 |
+
[2025-04-17 15:52:29,567][48472] Stopping InferenceWorker_p0-w0...
|
113 |
+
[2025-04-17 15:52:29,568][48473] Stopping RolloutWorker_w0...
|
114 |
+
[2025-04-17 15:52:29,568][48480] Loop rollout_proc7_evt_loop terminating...
|
115 |
+
[2025-04-17 15:52:29,569][48472] Loop inference_proc0-0_evt_loop terminating...
|
116 |
+
[2025-04-17 15:52:29,569][48473] Loop rollout_proc0_evt_loop terminating...
|
117 |
+
[2025-04-17 15:52:29,568][48459] Stopping Batcher_0...
|
118 |
+
[2025-04-17 15:52:29,570][48459] Loop batcher_evt_loop terminating...
|
119 |
+
[2025-04-17 15:52:29,566][38462] Runner profile tree view:
|
120 |
+
main_loop: 75.2660
|
121 |
+
[2025-04-17 15:52:29,576][38462] Collected {}, FPS: 0.0
|
122 |
+
[2025-04-17 15:52:31,947][48459] Using optimizer <class 'torch.optim.adam.Adam'>
|
123 |
+
[2025-04-17 15:52:33,028][48459] No checkpoints found
|
124 |
+
[2025-04-17 15:52:33,028][48459] Did not load from checkpoint, starting from scratch!
|
125 |
+
[2025-04-17 15:52:33,029][48459] Initialized policy 0 weights for model version 0
|
126 |
+
[2025-04-17 15:52:33,037][48459] LearnerWorker_p0 finished initialization!
|
127 |
+
[2025-04-17 15:52:33,037][48459] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
|
128 |
+
[2025-04-17 15:52:33,058][48459] Stopping LearnerWorker_p0...
|
129 |
+
[2025-04-17 15:52:33,058][48459] Loop learner_proc0_evt_loop terminating...
|
130 |
+
[2025-04-17 15:58:07,874][38462] Environment doom_basic already registered, overwriting...
|
131 |
+
[2025-04-17 15:58:07,878][38462] Environment doom_two_colors_easy already registered, overwriting...
|
132 |
+
[2025-04-17 15:58:07,878][38462] Environment doom_two_colors_hard already registered, overwriting...
|
133 |
+
[2025-04-17 15:58:07,879][38462] Environment doom_dm already registered, overwriting...
|
134 |
+
[2025-04-17 15:58:07,880][38462] Environment doom_dwango5 already registered, overwriting...
|
135 |
+
[2025-04-17 15:58:07,881][38462] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
136 |
+
[2025-04-17 15:58:07,882][38462] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
137 |
+
[2025-04-17 15:58:07,883][38462] Environment doom_my_way_home already registered, overwriting...
|
138 |
+
[2025-04-17 15:58:07,885][38462] Environment doom_deadly_corridor already registered, overwriting...
|
139 |
+
[2025-04-17 15:58:07,886][38462] Environment doom_defend_the_center already registered, overwriting...
|
140 |
+
[2025-04-17 15:58:07,887][38462] Environment doom_defend_the_line already registered, overwriting...
|
141 |
+
[2025-04-17 15:58:07,888][38462] Environment doom_health_gathering already registered, overwriting...
|
142 |
+
[2025-04-17 15:58:07,889][38462] Environment doom_health_gathering_supreme already registered, overwriting...
|
143 |
+
[2025-04-17 15:58:07,890][38462] Environment doom_battle already registered, overwriting...
|
144 |
+
[2025-04-17 15:58:07,891][38462] Environment doom_battle2 already registered, overwriting...
|
145 |
+
[2025-04-17 15:58:07,892][38462] Environment doom_duel_bots already registered, overwriting...
|
146 |
+
[2025-04-17 15:58:07,892][38462] Environment doom_deathmatch_bots already registered, overwriting...
|
147 |
+
[2025-04-17 15:58:07,894][38462] Environment doom_duel already registered, overwriting...
|
148 |
+
[2025-04-17 15:58:07,894][38462] Environment doom_deathmatch_full already registered, overwriting...
|
149 |
+
[2025-04-17 15:58:07,895][38462] Environment doom_benchmark already registered, overwriting...
|
150 |
+
[2025-04-17 15:58:07,896][38462] register_encoder_factory: <function make_vizdoom_encoder at 0x7fd13a31b250>
|
151 |
+
[2025-04-17 15:58:07,909][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
|
152 |
+
[2025-04-17 15:58:07,911][38462] Overriding arg 'num_workers' with value 1 passed from command line
|
153 |
+
[2025-04-17 15:58:07,913][38462] Overriding arg 'num_envs_per_worker' with value 1 passed from command line
|
154 |
+
[2025-04-17 15:58:07,913][38462] Overriding arg 'train_for_env_steps' with value 4000 passed from command line
|
155 |
+
[2025-04-17 15:58:07,921][38462] Experiment dir /home/uccacbo/Deep-RL-HF/train_dir/default_experiment already exists!
|
156 |
+
[2025-04-17 15:58:07,922][38462] Resuming existing experiment from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment...
|
157 |
+
[2025-04-17 15:58:07,924][38462] Weights and Biases integration disabled
|
158 |
+
[2025-04-17 15:58:07,933][38462] Environment var CUDA_VISIBLE_DEVICES is 0
|
159 |
+
|
160 |
+
[2025-04-17 15:58:10,470][38462] cfg.num_envs_per_worker=1 must be a multiple of cfg.worker_num_splits=2 (for double-buffered sampling you need to use even number of envs per worker)
|
161 |
+
[2025-04-17 15:58:21,350][38462] Environment doom_basic already registered, overwriting...
|
162 |
+
[2025-04-17 15:58:21,351][38462] Environment doom_two_colors_easy already registered, overwriting...
|
163 |
+
[2025-04-17 15:58:21,352][38462] Environment doom_two_colors_hard already registered, overwriting...
|
164 |
+
[2025-04-17 15:58:21,353][38462] Environment doom_dm already registered, overwriting...
|
165 |
+
[2025-04-17 15:58:21,354][38462] Environment doom_dwango5 already registered, overwriting...
|
166 |
+
[2025-04-17 15:58:21,354][38462] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
167 |
+
[2025-04-17 15:58:21,355][38462] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
168 |
+
[2025-04-17 15:58:21,355][38462] Environment doom_my_way_home already registered, overwriting...
|
169 |
+
[2025-04-17 15:58:21,356][38462] Environment doom_deadly_corridor already registered, overwriting...
|
170 |
+
[2025-04-17 15:58:21,356][38462] Environment doom_defend_the_center already registered, overwriting...
|
171 |
+
[2025-04-17 15:58:21,357][38462] Environment doom_defend_the_line already registered, overwriting...
|
172 |
+
[2025-04-17 15:58:21,358][38462] Environment doom_health_gathering already registered, overwriting...
|
173 |
+
[2025-04-17 15:58:21,359][38462] Environment doom_health_gathering_supreme already registered, overwriting...
|
174 |
+
[2025-04-17 15:58:21,359][38462] Environment doom_battle already registered, overwriting...
|
175 |
+
[2025-04-17 15:58:21,360][38462] Environment doom_battle2 already registered, overwriting...
|
176 |
+
[2025-04-17 15:58:21,361][38462] Environment doom_duel_bots already registered, overwriting...
|
177 |
+
[2025-04-17 15:58:21,361][38462] Environment doom_deathmatch_bots already registered, overwriting...
|
178 |
+
[2025-04-17 15:58:21,362][38462] Environment doom_duel already registered, overwriting...
|
179 |
+
[2025-04-17 15:58:21,363][38462] Environment doom_deathmatch_full already registered, overwriting...
|
180 |
+
[2025-04-17 15:58:21,364][38462] Environment doom_benchmark already registered, overwriting...
|
181 |
+
[2025-04-17 15:58:21,365][38462] register_encoder_factory: <function make_vizdoom_encoder at 0x7fd13a31b250>
|
182 |
+
[2025-04-17 15:58:21,371][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
|
183 |
+
[2025-04-17 15:58:21,372][38462] Overriding arg 'num_workers' with value 1 passed from command line
|
184 |
+
[2025-04-17 15:58:21,373][38462] Overriding arg 'num_envs_per_worker' with value 2 passed from command line
|
185 |
+
[2025-04-17 15:58:21,373][38462] Overriding arg 'train_for_env_steps' with value 4000 passed from command line
|
186 |
+
[2025-04-17 15:58:21,378][38462] Experiment dir /home/uccacbo/Deep-RL-HF/train_dir/default_experiment already exists!
|
187 |
+
[2025-04-17 15:58:21,379][38462] Resuming existing experiment from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment...
|
188 |
+
[2025-04-17 15:58:21,380][38462] Weights and Biases integration disabled
|
189 |
+
[2025-04-17 15:58:21,382][38462] Environment var CUDA_VISIBLE_DEVICES is 0
|
190 |
+
|
191 |
+
[2025-04-17 15:58:22,980][38462] Starting experiment with the following configuration:
|
192 |
+
help=False
|
193 |
+
algo=APPO
|
194 |
+
env=doom_health_gathering_supreme
|
195 |
+
experiment=default_experiment
|
196 |
+
train_dir=/home/uccacbo/Deep-RL-HF/train_dir
|
197 |
+
restart_behavior=resume
|
198 |
+
device=gpu
|
199 |
+
seed=None
|
200 |
+
num_policies=1
|
201 |
+
async_rl=True
|
202 |
+
serial_mode=False
|
203 |
+
batched_sampling=False
|
204 |
+
num_batches_to_accumulate=2
|
205 |
+
worker_num_splits=2
|
206 |
+
policy_workers_per_policy=1
|
207 |
+
max_policy_lag=1000
|
208 |
+
num_workers=1
|
209 |
+
num_envs_per_worker=2
|
210 |
+
batch_size=1024
|
211 |
+
num_batches_per_epoch=1
|
212 |
+
num_epochs=1
|
213 |
+
rollout=32
|
214 |
+
recurrence=32
|
215 |
+
shuffle_minibatches=False
|
216 |
+
gamma=0.99
|
217 |
+
reward_scale=1.0
|
218 |
+
reward_clip=1000.0
|
219 |
+
value_bootstrap=False
|
220 |
+
normalize_returns=True
|
221 |
+
exploration_loss_coeff=0.001
|
222 |
+
value_loss_coeff=0.5
|
223 |
+
kl_loss_coeff=0.0
|
224 |
+
exploration_loss=symmetric_kl
|
225 |
+
gae_lambda=0.95
|
226 |
+
ppo_clip_ratio=0.1
|
227 |
+
ppo_clip_value=0.2
|
228 |
+
with_vtrace=False
|
229 |
+
vtrace_rho=1.0
|
230 |
+
vtrace_c=1.0
|
231 |
+
optimizer=adam
|
232 |
+
adam_eps=1e-06
|
233 |
+
adam_beta1=0.9
|
234 |
+
adam_beta2=0.999
|
235 |
+
max_grad_norm=4.0
|
236 |
+
learning_rate=0.0001
|
237 |
+
lr_schedule=constant
|
238 |
+
lr_schedule_kl_threshold=0.008
|
239 |
+
lr_adaptive_min=1e-06
|
240 |
+
lr_adaptive_max=0.01
|
241 |
+
obs_subtract_mean=0.0
|
242 |
+
obs_scale=255.0
|
243 |
+
normalize_input=True
|
244 |
+
normalize_input_keys=None
|
245 |
+
decorrelate_experience_max_seconds=0
|
246 |
+
decorrelate_envs_on_one_worker=True
|
247 |
+
actor_worker_gpus=[]
|
248 |
+
set_workers_cpu_affinity=True
|
249 |
+
force_envs_single_thread=False
|
250 |
+
default_niceness=0
|
251 |
+
log_to_file=True
|
252 |
+
experiment_summaries_interval=10
|
253 |
+
flush_summaries_interval=30
|
254 |
+
stats_avg=100
|
255 |
+
summaries_use_frameskip=True
|
256 |
+
heartbeat_interval=20
|
257 |
+
heartbeat_reporting_interval=600
|
258 |
+
train_for_env_steps=4000
|
259 |
+
train_for_seconds=10000000000
|
260 |
+
save_every_sec=120
|
261 |
+
keep_checkpoints=2
|
262 |
+
load_checkpoint_kind=latest
|
263 |
+
save_milestones_sec=-1
|
264 |
+
save_best_every_sec=5
|
265 |
+
save_best_metric=reward
|
266 |
+
save_best_after=100000
|
267 |
+
benchmark=False
|
268 |
+
encoder_mlp_layers=[512, 512]
|
269 |
+
encoder_conv_architecture=convnet_simple
|
270 |
+
encoder_conv_mlp_layers=[512]
|
271 |
+
use_rnn=True
|
272 |
+
rnn_size=512
|
273 |
+
rnn_type=gru
|
274 |
+
rnn_num_layers=1
|
275 |
+
decoder_mlp_layers=[]
|
276 |
+
nonlinearity=elu
|
277 |
+
policy_initialization=orthogonal
|
278 |
+
policy_init_gain=1.0
|
279 |
+
actor_critic_share_weights=True
|
280 |
+
adaptive_stddev=True
|
281 |
+
continuous_tanh_scale=0.0
|
282 |
+
initial_stddev=1.0
|
283 |
+
use_env_info_cache=False
|
284 |
+
env_gpu_actions=False
|
285 |
+
env_gpu_observations=True
|
286 |
+
env_frameskip=4
|
287 |
+
env_framestack=1
|
288 |
+
pixel_format=CHW
|
289 |
+
use_record_episode_statistics=False
|
290 |
+
with_wandb=False
|
291 |
+
wandb_user=None
|
292 |
+
wandb_project=sample_factory
|
293 |
+
wandb_group=None
|
294 |
+
wandb_job_type=SF
|
295 |
+
wandb_tags=[]
|
296 |
+
with_pbt=False
|
297 |
+
pbt_mix_policies_in_one_env=True
|
298 |
+
pbt_period_env_steps=5000000
|
299 |
+
pbt_start_mutation=20000000
|
300 |
+
pbt_replace_fraction=0.3
|
301 |
+
pbt_mutation_rate=0.15
|
302 |
+
pbt_replace_reward_gap=0.1
|
303 |
+
pbt_replace_reward_gap_absolute=1e-06
|
304 |
+
pbt_optimize_gamma=False
|
305 |
+
pbt_target_objective=true_objective
|
306 |
+
pbt_perturb_min=1.1
|
307 |
+
pbt_perturb_max=1.5
|
308 |
+
num_agents=-1
|
309 |
+
num_humans=0
|
310 |
+
num_bots=-1
|
311 |
+
start_bot_difficulty=None
|
312 |
+
timelimit=None
|
313 |
+
res_w=128
|
314 |
+
res_h=72
|
315 |
+
wide_aspect_ratio=False
|
316 |
+
eval_env_frameskip=1
|
317 |
+
fps=35
|
318 |
+
command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=400000
|
319 |
+
cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 400000}
|
320 |
+
git_hash=unknown
|
321 |
+
git_repo_name=not a git repository
|
322 |
+
[2025-04-17 15:58:22,981][38462] Saving configuration to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json...
|
323 |
+
[2025-04-17 15:58:22,982][38462] Rollout worker 0 uses device cpu
|
324 |
+
[2025-04-17 15:58:23,030][38462] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
325 |
+
[2025-04-17 15:58:23,031][38462] InferenceWorker_p0-w0: min num requests: 1
|
326 |
+
[2025-04-17 15:58:23,035][38462] Starting all processes...
|
327 |
+
[2025-04-17 15:58:23,035][38462] Starting process learner_proc0
|
328 |
+
[2025-04-17 15:58:23,085][38462] Starting all processes...
|
329 |
+
[2025-04-17 15:58:23,088][38462] Starting process inference_proc0-0
|
330 |
+
[2025-04-17 15:58:23,089][38462] Starting process rollout_proc0
|
331 |
+
[2025-04-17 15:58:24,649][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
332 |
+
[2025-04-17 15:58:24,649][51423] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
333 |
+
[2025-04-17 15:58:24,656][51429] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
334 |
+
[2025-04-17 15:58:24,671][51430] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
335 |
+
[2025-04-17 15:58:24,671][51430] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
336 |
+
[2025-04-17 15:58:24,718][51430] Num visible devices: 1
|
337 |
+
[2025-04-17 15:58:24,718][51423] Num visible devices: 1
|
338 |
+
[2025-04-17 15:58:24,719][51423] Starting seed is not provided
|
339 |
+
[2025-04-17 15:58:24,720][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
340 |
+
[2025-04-17 15:58:24,720][51423] Initializing actor-critic model on device cuda:0
|
341 |
+
[2025-04-17 15:58:24,720][51423] RunningMeanStd input shape: (3, 72, 128)
|
342 |
+
[2025-04-17 15:58:24,721][51423] RunningMeanStd input shape: (1,)
|
343 |
+
[2025-04-17 15:58:24,728][51423] ConvEncoder: input_channels=3
|
344 |
+
[2025-04-17 15:58:24,838][51423] Conv encoder output size: 512
|
345 |
+
[2025-04-17 15:58:24,839][51423] Policy head output size: 512
|
346 |
+
[2025-04-17 15:58:24,856][51423] Created Actor Critic model with architecture:
|
347 |
+
[2025-04-17 15:58:24,856][51423] ActorCriticSharedWeights(
|
348 |
+
(obs_normalizer): ObservationNormalizer(
|
349 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
350 |
+
(running_mean_std): ModuleDict(
|
351 |
+
(obs): RunningMeanStdInPlace()
|
352 |
+
)
|
353 |
+
)
|
354 |
+
)
|
355 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
356 |
+
(encoder): VizdoomEncoder(
|
357 |
+
(basic_encoder): ConvEncoder(
|
358 |
+
(enc): RecursiveScriptModule(
|
359 |
+
original_name=ConvEncoderImpl
|
360 |
+
(conv_head): RecursiveScriptModule(
|
361 |
+
original_name=Sequential
|
362 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
363 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
364 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
365 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
366 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
367 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
368 |
+
)
|
369 |
+
(mlp_layers): RecursiveScriptModule(
|
370 |
+
original_name=Sequential
|
371 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
372 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
373 |
+
)
|
374 |
+
)
|
375 |
+
)
|
376 |
+
)
|
377 |
+
(core): ModelCoreRNN(
|
378 |
+
(core): GRU(512, 512)
|
379 |
+
)
|
380 |
+
(decoder): MlpDecoder(
|
381 |
+
(mlp): Identity()
|
382 |
+
)
|
383 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
384 |
+
(action_parameterization): ActionParameterizationDefault(
|
385 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
386 |
+
)
|
387 |
+
)
|
388 |
+
[2025-04-17 15:58:25,270][51423] Using optimizer <class 'torch.optim.adam.Adam'>
|
389 |
+
[2025-04-17 15:58:26,272][51423] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
|
390 |
+
[2025-04-17 15:58:26,309][51423] Loading model from checkpoint
|
391 |
+
[2025-04-17 15:58:26,310][51423] Loaded experiment state at self.train_step=0, self.env_steps=0
|
392 |
+
[2025-04-17 15:58:26,310][51423] Initialized policy 0 weights for model version 0
|
393 |
+
[2025-04-17 15:58:26,315][51423] LearnerWorker_p0 finished initialization!
|
394 |
+
[2025-04-17 15:58:26,316][51423] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
395 |
+
[2025-04-17 15:58:26,382][38462] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
396 |
+
[2025-04-17 15:58:26,480][51430] RunningMeanStd input shape: (3, 72, 128)
|
397 |
+
[2025-04-17 15:58:26,481][51430] RunningMeanStd input shape: (1,)
|
398 |
+
[2025-04-17 15:58:26,488][51430] ConvEncoder: input_channels=3
|
399 |
+
[2025-04-17 15:58:26,556][51430] Conv encoder output size: 512
|
400 |
+
[2025-04-17 15:58:26,556][51430] Policy head output size: 512
|
401 |
+
[2025-04-17 15:58:26,595][38462] Inference worker 0-0 is ready!
|
402 |
+
[2025-04-17 15:58:26,596][38462] All inference workers are ready! Signal rollout workers to start!
|
403 |
+
[2025-04-17 15:58:26,692][51429] Doom resolution: 160x120, resize resolution: (128, 72)
|
404 |
+
[2025-04-17 15:58:26,978][51429] Decorrelating experience for 0 frames...
|
405 |
+
[2025-04-17 15:58:27,129][51429] Decorrelating experience for 32 frames...
|
406 |
+
[2025-04-17 15:58:31,382][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 72.2. Samples: 361. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
407 |
+
[2025-04-17 15:58:31,385][38462] Avg episode reward: [(0, '4.080')]
|
408 |
+
[2025-04-17 15:58:36,385][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 167.4. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
409 |
+
[2025-04-17 15:58:36,420][38462] Avg episode reward: [(0, '4.191')]
|
410 |
+
[2025-04-17 15:58:41,389][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 111.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
411 |
+
[2025-04-17 15:58:41,405][38462] Avg episode reward: [(0, '4.191')]
|
412 |
+
[2025-04-17 15:58:43,078][38462] Heartbeat connected on Batcher_0
|
413 |
+
[2025-04-17 15:58:43,145][38462] Heartbeat connected on RolloutWorker_w0
|
414 |
+
[2025-04-17 15:58:46,385][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 83.7. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
415 |
+
[2025-04-17 15:58:46,453][38462] Avg episode reward: [(0, '4.191')]
|
416 |
+
[2025-04-17 15:58:51,549][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 66.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
417 |
+
[2025-04-17 15:58:52,038][38462] Avg episode reward: [(0, '4.191')]
|
418 |
+
[2025-04-17 15:58:56,514][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 55.6. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
419 |
+
[2025-04-17 15:58:56,762][38462] Avg episode reward: [(0, '4.191')]
|
420 |
+
[2025-04-17 15:59:01,432][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 47.8. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
421 |
+
[2025-04-17 15:59:01,678][38462] Avg episode reward: [(0, '4.191')]
|
422 |
+
[2025-04-17 15:59:06,401][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 41.8. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
423 |
+
[2025-04-17 15:59:06,881][38462] Avg episode reward: [(0, '4.191')]
|
424 |
+
[2025-04-17 15:59:11,636][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 37.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
425 |
+
[2025-04-17 15:59:12,123][38462] Avg episode reward: [(0, '4.191')]
|
426 |
+
[2025-04-17 15:59:16,672][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 29.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
427 |
+
[2025-04-17 15:59:17,010][38462] Avg episode reward: [(0, '4.191')]
|
428 |
+
[2025-04-17 15:59:23,230][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
429 |
+
[2025-04-17 15:59:23,341][38462] Avg episode reward: [(0, '4.191')]
|
430 |
+
[2025-04-17 15:59:26,435][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
431 |
+
[2025-04-17 15:59:26,561][38462] Avg episode reward: [(0, '4.191')]
|
432 |
+
[2025-04-17 15:59:31,394][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
433 |
+
[2025-04-17 15:59:31,468][38462] Avg episode reward: [(0, '4.191')]
|
434 |
+
[2025-04-17 15:59:36,412][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
435 |
+
[2025-04-17 15:59:36,464][38462] Avg episode reward: [(0, '4.191')]
|
436 |
+
[2025-04-17 15:59:41,432][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
437 |
+
[2025-04-17 15:59:41,842][38462] Avg episode reward: [(0, '4.191')]
|
438 |
+
[2025-04-17 15:59:46,467][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
439 |
+
[2025-04-17 15:59:46,605][38462] Avg episode reward: [(0, '4.191')]
|
440 |
+
[2025-04-17 15:59:51,396][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
441 |
+
[2025-04-17 15:59:51,426][38462] Avg episode reward: [(0, '4.191')]
|
442 |
+
[2025-04-17 15:59:57,740][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
443 |
+
[2025-04-17 15:59:58,246][38462] Avg episode reward: [(0, '4.191')]
|
444 |
+
[2025-04-17 16:00:01,482][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
445 |
+
[2025-04-17 16:00:01,776][38462] Avg episode reward: [(0, '4.191')]
|
446 |
+
[2025-04-17 16:00:06,505][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
447 |
+
[2025-04-17 16:00:06,724][38462] Avg episode reward: [(0, '4.191')]
|
448 |
+
[2025-04-17 16:00:11,616][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
449 |
+
[2025-04-17 16:00:11,940][38462] Avg episode reward: [(0, '4.191')]
|
450 |
+
[2025-04-17 16:00:16,522][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
451 |
+
[2025-04-17 16:00:16,973][38462] Avg episode reward: [(0, '4.191')]
|
452 |
+
[2025-04-17 16:00:21,574][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
453 |
+
[2025-04-17 16:00:21,990][38462] Avg episode reward: [(0, '4.191')]
|
454 |
+
[2025-04-17 16:00:26,518][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
455 |
+
[2025-04-17 16:00:26,894][38462] Avg episode reward: [(0, '4.191')]
|
456 |
+
[2025-04-17 16:00:32,477][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
457 |
+
[2025-04-17 16:00:33,016][38462] Avg episode reward: [(0, '4.191')]
|
458 |
+
[2025-04-17 16:00:36,512][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
459 |
+
[2025-04-17 16:00:36,813][38462] Avg episode reward: [(0, '4.191')]
|
460 |
+
[2025-04-17 16:00:41,673][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
461 |
+
[2025-04-17 16:00:42,174][38462] Avg episode reward: [(0, '4.191')]
|
462 |
+
[2025-04-17 16:00:46,558][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
463 |
+
[2025-04-17 16:00:46,922][38462] Avg episode reward: [(0, '4.191')]
|
464 |
+
[2025-04-17 16:00:51,544][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
465 |
+
[2025-04-17 16:00:51,982][38462] Avg episode reward: [(0, '4.191')]
|
466 |
+
[2025-04-17 16:00:56,573][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
467 |
+
[2025-04-17 16:00:56,913][38462] Avg episode reward: [(0, '4.191')]
|
468 |
+
[2025-04-17 16:01:01,408][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
469 |
+
[2025-04-17 16:01:01,494][38462] Avg episode reward: [(0, '4.191')]
|
470 |
+
[2025-04-17 16:01:06,768][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
471 |
+
[2025-04-17 16:01:07,035][38462] Avg episode reward: [(0, '4.191')]
|
472 |
+
[2025-04-17 16:01:11,481][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
473 |
+
[2025-04-17 16:01:11,860][38462] Avg episode reward: [(0, '4.191')]
|
474 |
+
[2025-04-17 16:01:16,526][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
475 |
+
[2025-04-17 16:01:17,049][38462] Avg episode reward: [(0, '4.191')]
|
476 |
+
[2025-04-17 16:01:21,568][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
477 |
+
[2025-04-17 16:01:22,025][38462] Avg episode reward: [(0, '4.191')]
|
478 |
+
[2025-04-17 16:01:26,577][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
479 |
+
[2025-04-17 16:01:27,041][38462] Avg episode reward: [(0, '4.191')]
|
480 |
+
[2025-04-17 16:01:32,480][38462] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 1674. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
481 |
+
[2025-04-17 16:01:37,774][38462] Avg episode reward: [(0, '4.191')]
|
482 |
+
[2025-04-17 16:01:41,829][38462] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 38462], exiting...
|
483 |
+
[2025-04-17 16:01:41,835][51423] Stopping Batcher_0...
|
484 |
+
[2025-04-17 16:01:41,836][51423] Loop batcher_evt_loop terminating...
|
485 |
+
[2025-04-17 16:01:41,835][38462] Runner profile tree view:
|
486 |
+
main_loop: 198.8007
|
487 |
+
[2025-04-17 16:01:41,838][38462] Collected {0: 0}, FPS: 0.0
|
488 |
+
[2025-04-17 16:01:41,897][51429] Stopping RolloutWorker_w0...
|
489 |
+
[2025-04-17 16:01:41,900][51429] Loop rollout_proc0_evt_loop terminating...
|
490 |
+
[2025-04-17 16:01:42,285][51430] Weights refcount: 2 0
|
491 |
+
[2025-04-17 16:01:42,292][51430] Stopping InferenceWorker_p0-w0...
|
492 |
+
[2025-04-17 16:01:42,293][51430] Loop inference_proc0-0_evt_loop terminating...
|
493 |
+
[2025-04-17 16:01:42,343][51423] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
|
494 |
+
[2025-04-17 16:01:42,607][51423] Saving /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
|
495 |
+
[2025-04-17 16:01:42,772][51423] Stopping LearnerWorker_p0...
|
496 |
+
[2025-04-17 16:01:42,772][51423] Loop learner_proc0_evt_loop terminating...
|
497 |
+
[2025-04-17 16:02:52,360][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
|
498 |
+
[2025-04-17 16:02:52,360][38462] Adding new argument 'no_render'=True that is not in the saved config file!
|
499 |
+
[2025-04-17 16:02:52,361][38462] Adding new argument 'save_video'=True that is not in the saved config file!
|
500 |
+
[2025-04-17 16:02:52,362][38462] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
501 |
+
[2025-04-17 16:02:52,362][38462] Adding new argument 'video_name'=None that is not in the saved config file!
|
502 |
+
[2025-04-17 16:02:52,362][38462] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
503 |
+
[2025-04-17 16:02:52,363][38462] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
504 |
+
[2025-04-17 16:02:52,364][38462] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
505 |
+
[2025-04-17 16:02:52,365][38462] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
506 |
+
[2025-04-17 16:02:52,365][38462] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
507 |
+
[2025-04-17 16:02:52,366][38462] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
508 |
+
[2025-04-17 16:02:52,367][38462] Adding new argument 'train_script'=None that is not in the saved config file!
|
509 |
+
[2025-04-17 16:02:52,368][38462] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
510 |
+
[2025-04-17 16:02:52,369][38462] Using frameskip 1 and render_action_repeat=4 for evaluation
|
511 |
+
[2025-04-17 16:02:52,401][38462] Doom resolution: 160x120, resize resolution: (128, 72)
|
512 |
+
[2025-04-17 16:02:52,406][38462] RunningMeanStd input shape: (3, 72, 128)
|
513 |
+
[2025-04-17 16:02:52,409][38462] RunningMeanStd input shape: (1,)
|
514 |
+
[2025-04-17 16:02:52,439][38462] ConvEncoder: input_channels=3
|
515 |
+
[2025-04-17 16:02:52,562][38462] Conv encoder output size: 512
|
516 |
+
[2025-04-17 16:02:52,562][38462] Policy head output size: 512
|
517 |
+
[2025-04-17 16:02:53,101][38462] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
|
518 |
+
[2025-04-17 16:02:53,978][38462] Num frames 100...
|
519 |
+
[2025-04-17 16:02:54,078][38462] Num frames 200...
|
520 |
+
[2025-04-17 16:02:54,181][38462] Num frames 300...
|
521 |
+
[2025-04-17 16:02:54,318][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
522 |
+
[2025-04-17 16:02:54,319][38462] Avg episode reward: 3.840, avg true_objective: 3.840
|
523 |
+
[2025-04-17 16:02:54,342][38462] Num frames 400...
|
524 |
+
[2025-04-17 16:02:54,452][38462] Num frames 500...
|
525 |
+
[2025-04-17 16:02:54,546][38462] Num frames 600...
|
526 |
+
[2025-04-17 16:02:54,647][38462] Num frames 700...
|
527 |
+
[2025-04-17 16:02:54,754][38462] Num frames 800...
|
528 |
+
[2025-04-17 16:02:54,846][38462] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
|
529 |
+
[2025-04-17 16:02:54,847][38462] Avg episode reward: 4.660, avg true_objective: 4.160
|
530 |
+
[2025-04-17 16:02:54,921][38462] Num frames 900...
|
531 |
+
[2025-04-17 16:02:55,019][38462] Num frames 1000...
|
532 |
+
[2025-04-17 16:02:55,120][38462] Num frames 1100...
|
533 |
+
[2025-04-17 16:02:55,219][38462] Num frames 1200...
|
534 |
+
[2025-04-17 16:02:55,293][38462] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
|
535 |
+
[2025-04-17 16:02:55,295][38462] Avg episode reward: 4.387, avg true_objective: 4.053
|
536 |
+
[2025-04-17 16:02:55,393][38462] Num frames 1300...
|
537 |
+
[2025-04-17 16:02:55,488][38462] Num frames 1400...
|
538 |
+
[2025-04-17 16:02:55,594][38462] Num frames 1500...
|
539 |
+
[2025-04-17 16:02:55,695][38462] Num frames 1600...
|
540 |
+
[2025-04-17 16:02:55,780][38462] Avg episode rewards: #0: 4.580, true rewards: #0: 4.080
|
541 |
+
[2025-04-17 16:02:55,781][38462] Avg episode reward: 4.580, avg true_objective: 4.080
|
542 |
+
[2025-04-17 16:02:55,847][38462] Num frames 1700...
|
543 |
+
[2025-04-17 16:02:55,945][38462] Num frames 1800...
|
544 |
+
[2025-04-17 16:02:56,038][38462] Num frames 1900...
|
545 |
+
[2025-04-17 16:02:56,132][38462] Num frames 2000...
|
546 |
+
[2025-04-17 16:02:56,203][38462] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
|
547 |
+
[2025-04-17 16:02:56,204][38462] Avg episode reward: 4.432, avg true_objective: 4.032
|
548 |
+
[2025-04-17 16:02:56,288][38462] Num frames 2100...
|
549 |
+
[2025-04-17 16:02:56,378][38462] Num frames 2200...
|
550 |
+
[2025-04-17 16:02:56,480][38462] Num frames 2300...
|
551 |
+
[2025-04-17 16:02:56,581][38462] Num frames 2400...
|
552 |
+
[2025-04-17 16:02:56,703][38462] Avg episode rewards: #0: 4.607, true rewards: #0: 4.107
|
553 |
+
[2025-04-17 16:02:56,704][38462] Avg episode reward: 4.607, avg true_objective: 4.107
|
554 |
+
[2025-04-17 16:02:56,756][38462] Num frames 2500...
|
555 |
+
[2025-04-17 16:02:56,866][38462] Num frames 2600...
|
556 |
+
[2025-04-17 16:02:56,971][38462] Num frames 2700...
|
557 |
+
[2025-04-17 16:02:57,079][38462] Num frames 2800...
|
558 |
+
[2025-04-17 16:02:57,183][38462] Avg episode rewards: #0: 4.497, true rewards: #0: 4.069
|
559 |
+
[2025-04-17 16:02:57,184][38462] Avg episode reward: 4.497, avg true_objective: 4.069
|
560 |
+
[2025-04-17 16:02:57,247][38462] Num frames 2900...
|
561 |
+
[2025-04-17 16:02:57,379][38462] Num frames 3000...
|
562 |
+
[2025-04-17 16:02:57,473][38462] Num frames 3100...
|
563 |
+
[2025-04-17 16:02:57,573][38462] Num frames 3200...
|
564 |
+
[2025-04-17 16:02:57,667][38462] Avg episode rewards: #0: 4.415, true rewards: #0: 4.040
|
565 |
+
[2025-04-17 16:02:57,668][38462] Avg episode reward: 4.415, avg true_objective: 4.040
|
566 |
+
[2025-04-17 16:02:57,740][38462] Num frames 3300...
|
567 |
+
[2025-04-17 16:02:57,834][38462] Num frames 3400...
|
568 |
+
[2025-04-17 16:02:57,924][38462] Num frames 3500...
|
569 |
+
[2025-04-17 16:02:58,021][38462] Num frames 3600...
|
570 |
+
[2025-04-17 16:02:58,089][38462] Avg episode rewards: #0: 4.351, true rewards: #0: 4.018
|
571 |
+
[2025-04-17 16:02:58,090][38462] Avg episode reward: 4.351, avg true_objective: 4.018
|
572 |
+
[2025-04-17 16:02:58,178][38462] Num frames 3700...
|
573 |
+
[2025-04-17 16:02:58,280][38462] Num frames 3800...
|
574 |
+
[2025-04-17 16:02:58,385][38462] Num frames 3900...
|
575 |
+
[2025-04-17 16:02:58,479][38462] Num frames 4000...
|
576 |
+
[2025-04-17 16:02:58,531][38462] Avg episode rewards: #0: 4.300, true rewards: #0: 4.000
|
577 |
+
[2025-04-17 16:02:58,532][38462] Avg episode reward: 4.300, avg true_objective: 4.000
|
578 |
+
[2025-04-17 16:03:03,627][38462] Replay video saved to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/replay.mp4!
|
579 |
+
[2025-04-17 16:04:25,402][38462] Loading existing experiment configuration from /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/config.json
|
580 |
+
[2025-04-17 16:04:25,403][38462] Adding new argument 'no_render'=True that is not in the saved config file!
|
581 |
+
[2025-04-17 16:04:25,404][38462] Adding new argument 'save_video'=True that is not in the saved config file!
|
582 |
+
[2025-04-17 16:04:25,405][38462] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
583 |
+
[2025-04-17 16:04:25,406][38462] Adding new argument 'video_name'=None that is not in the saved config file!
|
584 |
+
[2025-04-17 16:04:25,406][38462] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
585 |
+
[2025-04-17 16:04:25,407][38462] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
586 |
+
[2025-04-17 16:04:25,408][38462] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
587 |
+
[2025-04-17 16:04:25,409][38462] Adding new argument 'hf_repository'='c-bone/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
|
588 |
+
[2025-04-17 16:04:25,410][38462] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
589 |
+
[2025-04-17 16:04:25,410][38462] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
590 |
+
[2025-04-17 16:04:25,411][38462] Adding new argument 'train_script'=None that is not in the saved config file!
|
591 |
+
[2025-04-17 16:04:25,412][38462] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
592 |
+
[2025-04-17 16:04:25,413][38462] Using frameskip 1 and render_action_repeat=4 for evaluation
|
593 |
+
[2025-04-17 16:04:25,462][38462] RunningMeanStd input shape: (3, 72, 128)
|
594 |
+
[2025-04-17 16:04:25,467][38462] RunningMeanStd input shape: (1,)
|
595 |
+
[2025-04-17 16:04:25,484][38462] ConvEncoder: input_channels=3
|
596 |
+
[2025-04-17 16:04:25,521][38462] Conv encoder output size: 512
|
597 |
+
[2025-04-17 16:04:25,522][38462] Policy head output size: 512
|
598 |
+
[2025-04-17 16:04:25,546][38462] Loading state from checkpoint /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/checkpoint_p0/checkpoint_000000001_4096.pth...
|
599 |
+
[2025-04-17 16:04:26,051][38462] Num frames 100...
|
600 |
+
[2025-04-17 16:04:26,217][38462] Num frames 200...
|
601 |
+
[2025-04-17 16:04:26,369][38462] Num frames 300...
|
602 |
+
[2025-04-17 16:04:26,574][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
603 |
+
[2025-04-17 16:04:26,575][38462] Avg episode reward: 3.840, avg true_objective: 3.840
|
604 |
+
[2025-04-17 16:04:26,605][38462] Num frames 400...
|
605 |
+
[2025-04-17 16:04:26,774][38462] Num frames 500...
|
606 |
+
[2025-04-17 16:04:26,945][38462] Num frames 600...
|
607 |
+
[2025-04-17 16:04:27,102][38462] Num frames 700...
|
608 |
+
[2025-04-17 16:04:27,229][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
609 |
+
[2025-04-17 16:04:27,230][38462] Avg episode reward: 3.840, avg true_objective: 3.840
|
610 |
+
[2025-04-17 16:04:27,286][38462] Num frames 800...
|
611 |
+
[2025-04-17 16:04:27,450][38462] Num frames 900...
|
612 |
+
[2025-04-17 16:04:27,620][38462] Num frames 1000...
|
613 |
+
[2025-04-17 16:04:27,798][38462] Num frames 1100...
|
614 |
+
[2025-04-17 16:04:27,952][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
615 |
+
[2025-04-17 16:04:27,953][38462] Avg episode reward: 3.840, avg true_objective: 3.840
|
616 |
+
[2025-04-17 16:04:28,021][38462] Num frames 1200...
|
617 |
+
[2025-04-17 16:04:28,161][38462] Num frames 1300...
|
618 |
+
[2025-04-17 16:04:28,333][38462] Num frames 1400...
|
619 |
+
[2025-04-17 16:04:28,505][38462] Num frames 1500...
|
620 |
+
[2025-04-17 16:04:28,620][38462] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
621 |
+
[2025-04-17 16:04:28,621][38462] Avg episode reward: 3.840, avg true_objective: 3.840
|
622 |
+
[2025-04-17 16:04:28,729][38462] Num frames 1600...
|
623 |
+
[2025-04-17 16:04:28,895][38462] Num frames 1700...
|
624 |
+
[2025-04-17 16:04:29,022][38462] Num frames 1800...
|
625 |
+
[2025-04-17 16:04:29,187][38462] Num frames 1900...
|
626 |
+
[2025-04-17 16:04:29,376][38462] Avg episode rewards: #0: 4.168, true rewards: #0: 3.968
|
627 |
+
[2025-04-17 16:04:29,378][38462] Avg episode reward: 4.168, avg true_objective: 3.968
|
628 |
+
[2025-04-17 16:04:29,407][38462] Num frames 2000...
|
629 |
+
[2025-04-17 16:04:29,574][38462] Num frames 2100...
|
630 |
+
[2025-04-17 16:04:29,755][38462] Num frames 2200...
|
631 |
+
[2025-04-17 16:04:29,866][38462] Avg episode rewards: #0: 3.900, true rewards: #0: 3.733
|
632 |
+
[2025-04-17 16:04:29,867][38462] Avg episode reward: 3.900, avg true_objective: 3.733
|
633 |
+
[2025-04-17 16:04:29,984][38462] Num frames 2300...
|
634 |
+
[2025-04-17 16:04:30,150][38462] Num frames 2400...
|
635 |
+
[2025-04-17 16:04:30,311][38462] Num frames 2500...
|
636 |
+
[2025-04-17 16:04:30,476][38462] Num frames 2600...
|
637 |
+
[2025-04-17 16:04:30,565][38462] Avg episode rewards: #0: 3.891, true rewards: #0: 3.749
|
638 |
+
[2025-04-17 16:04:30,566][38462] Avg episode reward: 3.891, avg true_objective: 3.749
|
639 |
+
[2025-04-17 16:04:30,690][38462] Num frames 2700...
|
640 |
+
[2025-04-17 16:04:30,816][38462] Num frames 2800...
|
641 |
+
[2025-04-17 16:04:30,980][38462] Num frames 2900...
|
642 |
+
[2025-04-17 16:04:31,143][38462] Num frames 3000...
|
643 |
+
[2025-04-17 16:04:31,311][38462] Avg episode rewards: #0: 4.090, true rewards: #0: 3.840
|
644 |
+
[2025-04-17 16:04:31,312][38462] Avg episode reward: 4.090, avg true_objective: 3.840
|
645 |
+
[2025-04-17 16:04:31,360][38462] Num frames 3100...
|
646 |
+
[2025-04-17 16:04:33,597][38462] Num frames 3200...
|
647 |
+
[2025-04-17 16:04:33,721][38462] Num frames 3300...
|
648 |
+
[2025-04-17 16:04:33,886][38462] Num frames 3400...
|
649 |
+
[2025-04-17 16:04:34,033][38462] Avg episode rewards: #0: 4.062, true rewards: #0: 3.840
|
650 |
+
[2025-04-17 16:04:34,034][38462] Avg episode reward: 4.062, avg true_objective: 3.840
|
651 |
+
[2025-04-17 16:04:34,108][38462] Num frames 3500...
|
652 |
+
[2025-04-17 16:04:34,275][38462] Num frames 3600...
|
653 |
+
[2025-04-17 16:04:34,440][38462] Num frames 3700...
|
654 |
+
[2025-04-17 16:04:34,579][38462] Num frames 3800...
|
655 |
+
[2025-04-17 16:04:34,732][38462] Num frames 3900...
|
656 |
+
[2025-04-17 16:04:34,846][38462] Avg episode rewards: #0: 4.336, true rewards: #0: 3.936
|
657 |
+
[2025-04-17 16:04:34,847][38462] Avg episode reward: 4.336, avg true_objective: 3.936
|
658 |
+
[2025-04-17 16:04:40,292][38462] Replay video saved to /home/uccacbo/Deep-RL-HF/train_dir/default_experiment/replay.mp4!
|