ppo-Pyramids / run_logs /timers.json
enrique2701's picture
First Push, bigger network, more timesteps
472efe3 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.3137513995170593,
"min": 0.2869039475917816,
"max": 1.602157711982727,
"count": 236
},
"Pyramids.Policy.Entropy.sum": {
"value": 3268.03466796875,
"min": 2579.84033203125,
"max": 25634.5234375,
"count": 236
},
"Pyramids.Step.mean": {
"value": 2359943.0,
"min": 9656.0,
"max": 2359943.0,
"count": 236
},
"Pyramids.Step.sum": {
"value": 2359943.0,
"min": 9656.0,
"max": 2359943.0,
"count": 236
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.7108887434005737,
"min": -0.11550861597061157,
"max": 0.7489367723464966,
"count": 236
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 30.56821632385254,
"min": -2.1946637630462646,
"max": 36.7609748840332,
"count": 236
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.011286488734185696,
"min": -0.09177713841199875,
"max": 0.3759949207305908,
"count": 236
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 0.48531901836395264,
"min": -1.7437655925750732,
"max": 7.519898414611816,
"count": 236
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.04953947659505145,
"min": 0.04103936346124707,
"max": 0.06559274251965151,
"count": 236
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.0990789531901029,
"min": 0.04532025255927389,
"max": 0.17413268685156397,
"count": 236
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.013870685773915337,
"min": 0.0001758237912478459,
"max": 0.020322077112117164,
"count": 236
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.027741371547830674,
"min": 0.0001758237912478459,
"max": 0.056052702401454255,
"count": 236
},
"Pyramids.Policy.LearningRate.mean": {
"value": 6.450972849678333e-05,
"min": 6.450972849678333e-05,
"max": 0.00029918080027306664,
"count": 236
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00012901945699356665,
"min": 0.00012901945699356665,
"max": 0.0008054863315045666,
"count": 236
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.12150321666666666,
"min": 0.12150321666666666,
"max": 0.19972693333333336,
"count": 236
},
"Pyramids.Policy.Epsilon.sum": {
"value": 0.24300643333333333,
"min": 0.18914756666666668,
"max": 0.5684954333333333,
"count": 236
},
"Pyramids.Policy.Beta.mean": {
"value": 0.002158171345,
"min": 0.002158171345,
"max": 0.00997272064,
"count": 236
},
"Pyramids.Policy.Beta.sum": {
"value": 0.00431634269,
"min": 0.00431634269,
"max": 0.02685269379,
"count": 236
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.0087517024949193,
"min": 0.00788116455078125,
"max": 0.887866199016571,
"count": 236
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.0175034049898386,
"min": 0.0157623291015625,
"max": 0.887866199016571,
"count": 236
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 265.38461538461536,
"min": 199.64,
"max": 999.0,
"count": 233
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 10350.0,
"min": 999.0,
"max": 15984.0,
"count": 233
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.7422307424056225,
"min": -1.0000000596046448,
"max": 1.799679981470108,
"count": 236
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 67.94699895381927,
"min": -16.000000953674316,
"max": 89.9839990735054,
"count": 236
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.7422307424056225,
"min": -1.0000000596046448,
"max": 1.799679981470108,
"count": 236
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 67.94699895381927,
"min": -16.000000953674316,
"max": 89.9839990735054,
"count": 236
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.023170538909303453,
"min": 0.01913863663900305,
"max": 11.338852478907658,
"count": 236
},
"Pyramids.Policy.RndReward.sum": {
"value": 0.9036510174628347,
"min": 0.6428126245737076,
"max": 147.40508222579956,
"count": 236
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 236
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 236
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1708894521",
"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics --force",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1708899642"
},
"total": 5121.063279811,
"count": 1,
"self": 0.5806893689996286,
"children": {
"run_training.setup": {
"total": 0.04869531500025914,
"count": 1,
"self": 0.04869531500025914
},
"TrainerController.start_learning": {
"total": 5120.433895127,
"count": 1,
"self": 3.0514399051926375,
"children": {
"TrainerController._reset_env": {
"total": 2.512414589999935,
"count": 1,
"self": 2.512414589999935
},
"TrainerController.advance": {
"total": 5114.870038967807,
"count": 153082,
"self": 3.185298800975943,
"children": {
"env_step": {
"total": 3942.8121294630373,
"count": 153082,
"self": 3637.8237524569386,
"children": {
"SubprocessEnvManager._take_step": {
"total": 303.0628151972214,
"count": 153082,
"self": 10.899881569044737,
"children": {
"TorchPolicy.evaluate": {
"total": 292.1629336281767,
"count": 148268,
"self": 292.1629336281767
}
}
},
"workers": {
"total": 1.925561808877319,
"count": 153082,
"self": 0.0,
"children": {
"worker_root": {
"total": 5107.091545389932,
"count": 153082,
"is_parallel": true,
"self": 1747.1022461868743,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0021746020001955912,
"count": 1,
"is_parallel": true,
"self": 0.0006800889996156911,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014945130005799,
"count": 8,
"is_parallel": true,
"self": 0.0014945130005799
}
}
},
"UnityEnvironment.step": {
"total": 0.051919086999987485,
"count": 1,
"is_parallel": true,
"self": 0.0006314569991445751,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.000514745000145922,
"count": 1,
"is_parallel": true,
"self": 0.000514745000145922
},
"communicator.exchange": {
"total": 0.04898608700023033,
"count": 1,
"is_parallel": true,
"self": 0.04898608700023033
},
"steps_from_proto": {
"total": 0.0017867980004666606,
"count": 1,
"is_parallel": true,
"self": 0.000379430999601027,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014073670008656336,
"count": 8,
"is_parallel": true,
"self": 0.0014073670008656336
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 3359.989299203058,
"count": 153081,
"is_parallel": true,
"self": 82.08729729548031,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 56.47116115370682,
"count": 153081,
"is_parallel": true,
"self": 56.47116115370682
},
"communicator.exchange": {
"total": 2990.37006701228,
"count": 153081,
"is_parallel": true,
"self": 2990.37006701228
},
"steps_from_proto": {
"total": 231.06077374159122,
"count": 153081,
"is_parallel": true,
"self": 45.47935210940341,
"children": {
"_process_rank_one_or_two_observation": {
"total": 185.5814216321878,
"count": 1224648,
"is_parallel": true,
"self": 185.5814216321878
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 1168.8726107037937,
"count": 153082,
"self": 6.951279846911348,
"children": {
"process_trajectory": {
"total": 256.3582355698745,
"count": 153082,
"self": 255.9915498828732,
"children": {
"RLTrainer._checkpoint": {
"total": 0.3666856870013362,
"count": 4,
"self": 0.3666856870013362
}
}
},
"_update_policy": {
"total": 905.5630952870079,
"count": 517,
"self": 633.440543862157,
"children": {
"TorchPPOOptimizer.update": {
"total": 272.1225514248508,
"count": 27420,
"self": 272.1225514248508
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.6640005924273282e-06,
"count": 1,
"self": 1.6640005924273282e-06
}
}
}
}
}